From ecb376c34b17b7e009a4f0084a231076f25a2570 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 12 Feb 2017 17:40:20 +0000
Subject: [PATCH 1/3] Fixed bug in GridInterface multi-dimensional groupby
---
holoviews/core/data/grid.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py
index 4439f32402..9daa954a29 100644
--- a/holoviews/core/data/grid.py
+++ b/holoviews/core/data/grid.py
@@ -189,11 +189,11 @@ def groupby(cls, dataset, dim_names, container_type, group_type, **kwargs):
group_kwargs.update(kwargs)
# Find all the keys along supplied dimensions
- keys = [dataset.data[d.name] for d in dimensions]
+ keys = [dataset.dimension_values(d.name, False) for d in dimensions]
# Iterate over the unique entries applying selection masks
grouped_data = []
- for unique_key in zip(*util.cartesian_product(keys)):
+ for unique_key in zip(*(arr.flat for arr in util.cartesian_product(keys))):
group_data = cls.select(dataset, **dict(zip(dim_names, unique_key)))
if np.isscalar(group_data):
group_data = {dataset.vdims[0].name: np.atleast_1d(group_data)}
From c4d3a55aa0c200715a946850955df6dc3951a87a Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 12 Feb 2017 17:48:51 +0000
Subject: [PATCH 2/3] Added unit test for multi-dimensional groupby on grid
interfaces
---
tests/testdataset.py | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/tests/testdataset.py b/tests/testdataset.py
index 2e868e6068..dd8f77c4cc 100644
--- a/tests/testdataset.py
+++ b/tests/testdataset.py
@@ -3,6 +3,8 @@
"""
from unittest import SkipTest
+from itertools import product
+
import numpy as np
from holoviews import Dataset, NdElement, HoloMap, Dimension
from holoviews.element.comparison import ComparisonTestCase
@@ -833,6 +835,15 @@ def test_dataset_groupby_dynamic_alias(self):
kdims=[('y', 'Y')], vdims=[('z', 'Z')])
self.assertEqual(grouped[0], first)
+ def test_dataset_groupby_multiple_dims(self):
+ dataset = Dataset((range(8), range(8), range(8), range(8),
+ np.random.rand(8, 8, 8, 8)),
+ kdims=['a', 'b', 'c', 'd'], vdims=['Value'])
+ grouped = dataset.groupby(['c', 'd'])
+ keys = list(product(range(8), range(8)))
+ self.assertEqual(list(grouped.keys()), keys)
+ for c, d in keys:
+ self.assertEqual(grouped[c, d], dataset.select(c=c, d=d).reindex(['a', 'b']))
class IrisDatasetTest(GridDatasetTest):
From ade9734456ab007326f46a81c7d19c6a96caf9a7 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 12 Feb 2017 18:40:46 +0000
Subject: [PATCH 3/3] Made cartesian_product utility more versatile
---
holoviews/core/data/grid.py | 4 ++--
holoviews/core/data/xarray.py | 2 +-
holoviews/core/util.py | 16 ++++++++++------
holoviews/element/util.py | 4 ++--
holoviews/plotting/bokeh/raster.py | 7 +++----
5 files changed, 18 insertions(+), 15 deletions(-)
diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py
index 9daa954a29..4439f32402 100644
--- a/holoviews/core/data/grid.py
+++ b/holoviews/core/data/grid.py
@@ -189,11 +189,11 @@ def groupby(cls, dataset, dim_names, container_type, group_type, **kwargs):
group_kwargs.update(kwargs)
# Find all the keys along supplied dimensions
- keys = [dataset.dimension_values(d.name, False) for d in dimensions]
+ keys = [dataset.data[d.name] for d in dimensions]
# Iterate over the unique entries applying selection masks
grouped_data = []
- for unique_key in zip(*(arr.flat for arr in util.cartesian_product(keys))):
+ for unique_key in zip(*util.cartesian_product(keys)):
group_data = cls.select(dataset, **dict(zip(dim_names, unique_key)))
if np.isscalar(group_data):
group_data = {dataset.vdims[0].name: np.atleast_1d(group_data)}
diff --git a/holoviews/core/data/xarray.py b/holoviews/core/data/xarray.py
index 739aaf0ebc..eb8a945d8c 100644
--- a/holoviews/core/data/xarray.py
+++ b/holoviews/core/data/xarray.py
@@ -114,7 +114,7 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
dataset.data.groupby(index_dims[0].name)]
else:
unique_iters = [cls.values(dataset, d, False) for d in group_by]
- indexes = zip(*[vals.flat for vals in util.cartesian_product(unique_iters)])
+ indexes = zip(*util.cartesian_product(unique_iters))
data = [(k, group_type(dataset.data.sel(**dict(zip(group_by, k))),
**group_kwargs))
for k in indexes]
diff --git a/holoviews/core/util.py b/holoviews/core/util.py
index 1800d45811..8011bdc877 100644
--- a/holoviews/core/util.py
+++ b/holoviews/core/util.py
@@ -1057,13 +1057,17 @@ def groupby_python(self_or_cls, ndmapping, dimensions, container_type,
return container_type(groups, kdims=dimensions)
-def cartesian_product(arrays):
+def cartesian_product(arrays, flat=True, copy=False):
"""
- Computes the cartesian product of a list of 1D arrays
- returning arrays matching the shape defined by all
- supplied dimensions.
+ Efficient cartesian product of a list of 1D arrays returning the
+ expanded array views for each dimensions. By default arrays are
+ flattened, which may be controlled with the flat flag. The array
+ views can be turned into regular arrays with the copy flag.
"""
- return np.broadcast_arrays(*np.ix_(*arrays))
+ arrays = np.broadcast_arrays(*np.ix_(*arrays))
+ if flat:
+ return tuple(arr.flatten() if copy else arr.flat for arr in arrays)
+ return tuple(arr.copy() if copy else arr for arr in arrays)
def arglexsort(arrays):
@@ -1117,7 +1121,7 @@ def expand_grid_coords(dataset, dim):
arrays = [dataset.interface.coords(dataset, d.name, True)
for d in dataset.kdims]
idx = dataset.get_dimension_index(dim)
- return cartesian_product(arrays)[idx]
+ return cartesian_product(arrays, flat=False)[idx]
def dt64_to_dt(dt64):
diff --git a/holoviews/element/util.py b/holoviews/element/util.py
index 680751a8ca..453e0e513e 100644
--- a/holoviews/element/util.py
+++ b/holoviews/element/util.py
@@ -125,8 +125,8 @@ def _aggregate_dataset(self, obj, xcoords, ycoords):
shape = (len(ycoords), len(xcoords))
nsamples = np.product(shape)
- ys, xs = cartesian_product([ycoords, xcoords])
- data = {xdim: xs.flatten(), ydim: ys.flatten()}
+ ys, xs = cartesian_product([ycoords, xcoords], copy=True)
+ data = {xdim: xs, ydim: ys}
for vdim in vdims:
values = np.empty(nsamples)
values[:] = np.NaN
diff --git a/holoviews/plotting/bokeh/raster.py b/holoviews/plotting/bokeh/raster.py
index a084070978..a119d55210 100644
--- a/holoviews/plotting/bokeh/raster.py
+++ b/holoviews/plotting/bokeh/raster.py
@@ -189,10 +189,9 @@ def get_data(self, element, ranges=None, empty=False):
yvals = element.dimension_values(1, False)
widths = np.diff(element.data[0])
heights = np.diff(element.data[1])
- xs, ys = cartesian_product([xvals, yvals])
- ws, hs = cartesian_product([widths, heights])
- data = {x: xs.flatten(), y: ys.flatten(), z: zvals,
- 'widths': ws.flatten(), 'heights': hs.flatten()}
+ xs, ys = cartesian_product([xvals, yvals], copy=True)
+ ws, hs = cartesian_product([widths, heights], copy=True)
+ data = {x: xs, y: ys, z: zvals, 'widths': ws, 'heights': hs}
return (data, {'x': x, 'y': y,
'fill_color': {'field': z, 'transform': cmapper},