From ecb376c34b17b7e009a4f0084a231076f25a2570 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 12 Feb 2017 17:40:20 +0000 Subject: [PATCH 1/3] Fixed bug in GridInterface multi-dimensional groupby --- holoviews/core/data/grid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py index 4439f32402..9daa954a29 100644 --- a/holoviews/core/data/grid.py +++ b/holoviews/core/data/grid.py @@ -189,11 +189,11 @@ def groupby(cls, dataset, dim_names, container_type, group_type, **kwargs): group_kwargs.update(kwargs) # Find all the keys along supplied dimensions - keys = [dataset.data[d.name] for d in dimensions] + keys = [dataset.dimension_values(d.name, False) for d in dimensions] # Iterate over the unique entries applying selection masks grouped_data = [] - for unique_key in zip(*util.cartesian_product(keys)): + for unique_key in zip(*(arr.flat for arr in util.cartesian_product(keys))): group_data = cls.select(dataset, **dict(zip(dim_names, unique_key))) if np.isscalar(group_data): group_data = {dataset.vdims[0].name: np.atleast_1d(group_data)} From c4d3a55aa0c200715a946850955df6dc3951a87a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 12 Feb 2017 17:48:51 +0000 Subject: [PATCH 2/3] Added unit test for multi-dimensional groupby on grid interfaces --- tests/testdataset.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/testdataset.py b/tests/testdataset.py index 2e868e6068..dd8f77c4cc 100644 --- a/tests/testdataset.py +++ b/tests/testdataset.py @@ -3,6 +3,8 @@ """ from unittest import SkipTest +from itertools import product + import numpy as np from holoviews import Dataset, NdElement, HoloMap, Dimension from holoviews.element.comparison import ComparisonTestCase @@ -833,6 +835,15 @@ def test_dataset_groupby_dynamic_alias(self): kdims=[('y', 'Y')], vdims=[('z', 'Z')]) self.assertEqual(grouped[0], first) + def test_dataset_groupby_multiple_dims(self): + dataset = Dataset((range(8), range(8), range(8), range(8), + np.random.rand(8, 8, 8, 8)), + kdims=['a', 'b', 'c', 'd'], vdims=['Value']) + grouped = dataset.groupby(['c', 'd']) + keys = list(product(range(8), range(8))) + self.assertEqual(list(grouped.keys()), keys) + for c, d in keys: + self.assertEqual(grouped[c, d], dataset.select(c=c, d=d).reindex(['a', 'b'])) class IrisDatasetTest(GridDatasetTest): From ade9734456ab007326f46a81c7d19c6a96caf9a7 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 12 Feb 2017 18:40:46 +0000 Subject: [PATCH 3/3] Made cartesian_product utility more versatile --- holoviews/core/data/grid.py | 4 ++-- holoviews/core/data/xarray.py | 2 +- holoviews/core/util.py | 16 ++++++++++------ holoviews/element/util.py | 4 ++-- holoviews/plotting/bokeh/raster.py | 7 +++---- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py index 9daa954a29..4439f32402 100644 --- a/holoviews/core/data/grid.py +++ b/holoviews/core/data/grid.py @@ -189,11 +189,11 @@ def groupby(cls, dataset, dim_names, container_type, group_type, **kwargs): group_kwargs.update(kwargs) # Find all the keys along supplied dimensions - keys = [dataset.dimension_values(d.name, False) for d in dimensions] + keys = [dataset.data[d.name] for d in dimensions] # Iterate over the unique entries applying selection masks grouped_data = [] - for unique_key in zip(*(arr.flat for arr in util.cartesian_product(keys))): + for unique_key in zip(*util.cartesian_product(keys)): group_data = cls.select(dataset, **dict(zip(dim_names, unique_key))) if np.isscalar(group_data): group_data = {dataset.vdims[0].name: np.atleast_1d(group_data)} diff --git a/holoviews/core/data/xarray.py b/holoviews/core/data/xarray.py index 739aaf0ebc..eb8a945d8c 100644 --- a/holoviews/core/data/xarray.py +++ b/holoviews/core/data/xarray.py @@ -114,7 +114,7 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): dataset.data.groupby(index_dims[0].name)] else: unique_iters = [cls.values(dataset, d, False) for d in group_by] - indexes = zip(*[vals.flat for vals in util.cartesian_product(unique_iters)]) + indexes = zip(*util.cartesian_product(unique_iters)) data = [(k, group_type(dataset.data.sel(**dict(zip(group_by, k))), **group_kwargs)) for k in indexes] diff --git a/holoviews/core/util.py b/holoviews/core/util.py index 1800d45811..8011bdc877 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -1057,13 +1057,17 @@ def groupby_python(self_or_cls, ndmapping, dimensions, container_type, return container_type(groups, kdims=dimensions) -def cartesian_product(arrays): +def cartesian_product(arrays, flat=True, copy=False): """ - Computes the cartesian product of a list of 1D arrays - returning arrays matching the shape defined by all - supplied dimensions. + Efficient cartesian product of a list of 1D arrays returning the + expanded array views for each dimensions. By default arrays are + flattened, which may be controlled with the flat flag. The array + views can be turned into regular arrays with the copy flag. """ - return np.broadcast_arrays(*np.ix_(*arrays)) + arrays = np.broadcast_arrays(*np.ix_(*arrays)) + if flat: + return tuple(arr.flatten() if copy else arr.flat for arr in arrays) + return tuple(arr.copy() if copy else arr for arr in arrays) def arglexsort(arrays): @@ -1117,7 +1121,7 @@ def expand_grid_coords(dataset, dim): arrays = [dataset.interface.coords(dataset, d.name, True) for d in dataset.kdims] idx = dataset.get_dimension_index(dim) - return cartesian_product(arrays)[idx] + return cartesian_product(arrays, flat=False)[idx] def dt64_to_dt(dt64): diff --git a/holoviews/element/util.py b/holoviews/element/util.py index 680751a8ca..453e0e513e 100644 --- a/holoviews/element/util.py +++ b/holoviews/element/util.py @@ -125,8 +125,8 @@ def _aggregate_dataset(self, obj, xcoords, ycoords): shape = (len(ycoords), len(xcoords)) nsamples = np.product(shape) - ys, xs = cartesian_product([ycoords, xcoords]) - data = {xdim: xs.flatten(), ydim: ys.flatten()} + ys, xs = cartesian_product([ycoords, xcoords], copy=True) + data = {xdim: xs, ydim: ys} for vdim in vdims: values = np.empty(nsamples) values[:] = np.NaN diff --git a/holoviews/plotting/bokeh/raster.py b/holoviews/plotting/bokeh/raster.py index a084070978..a119d55210 100644 --- a/holoviews/plotting/bokeh/raster.py +++ b/holoviews/plotting/bokeh/raster.py @@ -189,10 +189,9 @@ def get_data(self, element, ranges=None, empty=False): yvals = element.dimension_values(1, False) widths = np.diff(element.data[0]) heights = np.diff(element.data[1]) - xs, ys = cartesian_product([xvals, yvals]) - ws, hs = cartesian_product([widths, heights]) - data = {x: xs.flatten(), y: ys.flatten(), z: zvals, - 'widths': ws.flatten(), 'heights': hs.flatten()} + xs, ys = cartesian_product([xvals, yvals], copy=True) + ws, hs = cartesian_product([widths, heights], copy=True) + data = {x: xs, y: ys, z: zvals, 'widths': ws, 'heights': hs} return (data, {'x': x, 'y': y, 'fill_color': {'field': z, 'transform': cmapper},