diff --git a/doc/Tutorials/Introduction.ipynb b/doc/Tutorials/Introduction.ipynb index 237b10a1a9..38e14b72cc 100644 --- a/doc/Tutorials/Introduction.ipynb +++ b/doc/Tutorials/Introduction.ipynb @@ -550,7 +550,7 @@ "source": [ "print(rgb_parrot)\n", "print(rgb_parrot[0,0])\n", - "print(rgb_parrot[0,0][0])" + "print(rgb_parrot[0,0].iloc[0, 0])" ] }, { diff --git a/examples/streams/bokeh/point_selection1D.ipynb b/examples/streams/bokeh/point_selection1D.ipynb index 33f092775f..1e1b84e332 100644 --- a/examples/streams/bokeh/point_selection1D.ipynb +++ b/examples/streams/bokeh/point_selection1D.ipynb @@ -42,12 +42,12 @@ "\n", "# Write function that uses the selection indices to slice points and compute stats\n", "def selected_info(index):\n", - " arr = points.array()[index]\n", + " selected = points.iloc[index]\n", " if index:\n", - " label = 'Mean x, y: %.3f, %.3f' % tuple(arr.mean(axis=0))\n", + " label = 'Mean x, y: %.3f, %.3f' % tuple(selected.array().mean(axis=0))\n", " else:\n", " label = 'No selection'\n", - " return points.clone(arr, label=label)(style=dict(color='red'))\n", + " return selected.relabel(label)(style=dict(color='red'))\n", "\n", "# Combine points and DynamicMap\n", "points + hv.DynamicMap(selected_info, streams=[selection])" diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index b7baec5abe..4944bb4780 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -9,7 +9,7 @@ import param from ..dimension import redim -from .interface import Interface +from .interface import Interface, iloc, ndloc from .array import ArrayInterface from .dictionary import DictInterface from .grid import GridInterface @@ -433,7 +433,9 @@ def sample(self, samples=[], closest=True, **kwargs): else: selection = tuple(selection.columns(kdims+self.vdims).values()) - return self.clone(selection, kdims=kdims, new_type=new_type) + datatype = list(util.unique_iterator(self.datatype+['dataframe', 'dict'])) + return self.clone(selection, kdims=kdims, new_type=new_type, + datatype=datatype) lens = set(len(util.wrap_tuple(s)) for s in samples) if len(lens) > 1: @@ -624,6 +626,59 @@ def to(self): return self._conversion_interface(self) + @property + def iloc(self): + """ + Returns an iloc object providing a convenient interface to + slice and index into the Dataset using row and column indices. + Allow selection by integer index, slice and list of integer + indices and boolean arrays. + + Examples: + + * Index the first row and column: + + dataset.iloc[0, 0] + + * Select rows 1 and 2 with a slice: + + dataset.iloc[1:3, :] + + * Select with a list of integer coordinates: + + dataset.iloc[[0, 2, 3]] + """ + return iloc(self) + + + @property + def ndloc(self): + """ + Returns an ndloc object providing nd-array like indexing for + gridded datasets. Follows NumPy array indexing conventions, + allowing for indexing, slicing and selecting a list of indices + on multi-dimensional arrays using integer indices. The order + of array indices is inverted relative to the Dataset key + dimensions, e.g. an Image with key dimensions 'x' and 'y' can + be indexed with ``image.ndloc[iy, ix]``, where ``iy`` and + ``ix`` are integer indices along the y and x dimensions. + + Examples: + + * Index value in 2D array: + + dataset.ndloc[3, 1] + + * Slice along y-axis of 2D array: + + dataset.ndloc[2:5, :] + + * Vectorized (non-orthogonal) indexing along x- and y-axes: + + dataset.ndloc[[1, 2, 3], [0, 2, 3]] + """ + return ndloc(self) + # Aliases for pickle backward compatibility Columns = Dataset diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py index 69a55c2864..073258a246 100644 --- a/holoviews/core/data/array.py +++ b/holoviews/core/data/array.py @@ -54,18 +54,19 @@ def init(cls, eltype, data, kdims, vdims): except: data = None + if kdims is None: + kdims = eltype.kdims + if vdims is None: + vdims = eltype.vdims + if data is None or data.ndim > 2 or data.dtype.kind in ['S', 'U', 'O']: raise ValueError("ArrayInterface interface could not handle input type.") elif data.ndim == 1: - if eltype._auto_indexable_1d: + if eltype._auto_indexable_1d and len(kdims)+len(vdims)>1: data = np.column_stack([np.arange(len(data)), data]) else: data = np.atleast_2d(data).T - if kdims is None: - kdims = eltype.kdims - if vdims is None: - vdims = eltype.vdims return data, {'kdims':kdims, 'vdims':vdims}, {} @classmethod @@ -232,4 +233,23 @@ def aggregate(cls, dataset, dimensions, function, **kwargs): return np.atleast_2d(rows) + @classmethod + def iloc(cls, dataset, index): + rows, cols = index + if np.isscalar(cols): + if isinstance(cols, util.basestring): + cols = dataset.get_dimension_index(cols) + if np.isscalar(rows): + return dataset.data[rows, cols] + cols = [dataset.get_dimension_index(cols)] + elif not isinstance(cols, slice): + cols = [dataset.get_dimension_index(d) for d in cols] + + if np.isscalar(rows): + rows = [rows] + data = dataset.data[rows, :][:, cols] + if data.ndim == 1: + return np.atleast_2d(data).T + return data + Interface.register(ArrayInterface) diff --git a/holoviews/core/data/dask.py b/holoviews/core/data/dask.py index 88e4ca21a9..984dbd8efc 100644 --- a/holoviews/core/data/dask.py +++ b/holoviews/core/data/dask.py @@ -12,7 +12,7 @@ from .. import util from ..element import Element -from ..ndmapping import NdMapping, item_check +from ..ndmapping import NdMapping, item_check, OrderedDict from .interface import Interface from .pandas import PandasInterface @@ -241,6 +241,30 @@ def dframe(cls, columns, dimensions): def nonzero(cls, dataset): return True + @classmethod + def iloc(cls, dataset, index): + """ + Dask does not support iloc, therefore iloc will execute + the call graph and lose the laziness of the operation. + """ + rows, cols = index + scalar = False + if isinstance(cols, slice): + cols = [d.name for d in dataset.dimensions()][cols] + elif np.isscalar(cols): + scalar = np.isscalar(rows) + cols = [dataset.get_dimension(cols).name] + else: + cols = [dataset.get_dimension(d).name for d in index[1]] + if np.isscalar(rows): + rows = [rows] + + data = OrderedDict() + for c in cols: + data[c] = dataset.data[c].compute().iloc[rows].values + if scalar: + return data[cols[0]][0] + return tuple(data.values()) Interface.register(DaskInterface) diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py index 7668f54898..e912aa3a9b 100644 --- a/holoviews/core/data/dictionary.py +++ b/holoviews/core/data/dictionary.py @@ -49,7 +49,7 @@ def init(cls, eltype, data, kdims, vdims): data = {d: data[d] for d in dimensions} elif isinstance(data, np.ndarray): if data.ndim == 1: - if eltype._auto_indexable_1d: + if eltype._auto_indexable_1d and len(kdims)+len(vdims)>1: data = np.column_stack([np.arange(len(data)), data]) else: data = np.atleast_2d(data).T @@ -261,4 +261,29 @@ def aggregate(cls, dataset, kdims, function, **kwargs): return aggregated + @classmethod + def iloc(cls, dataset, index): + rows, cols = index + scalar = False + if np.isscalar(cols): + scalar = np.isscalar(rows) + cols = [dataset.get_dimension(cols, strict=True)] + elif isinstance(cols, slice): + cols = dataset.dimensions()[cols] + else: + cols = [dataset.get_dimension(d, strict=True) for d in cols] + + if np.isscalar(rows): + rows = [rows] + + new_data = OrderedDict() + for d, values in dataset.data.items(): + if d in cols: + new_data[d] = values[rows] + + if scalar: + return new_data[cols[0].name][0] + return new_data + + Interface.register(DictInterface) diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py index 45fecb8d5b..a66949aaa8 100644 --- a/holoviews/core/data/grid.py +++ b/holoviews/core/data/grid.py @@ -1,4 +1,4 @@ -from collections import OrderedDict, defaultdict +from collections import OrderedDict, defaultdict, Iterable try: import itertools.izip as zip @@ -167,6 +167,53 @@ def canonicalize(cls, dataset, data, coord_dims=None): return data + @classmethod + def invert_index(cls, index, length): + if np.isscalar(index): + return length - index + elif isinstance(index, slice): + start, stop = index.start, index.stop + new_start, new_stop = None, None + if start is not None: + new_stop = length - start + if stop is not None: + new_start = length - stop + return slice(new_start-1, new_stop-1) + elif isinstance(index, Iterable): + new_index = [] + for ind in index: + new_index.append(length-ind) + return new_index + + + @classmethod + def ndloc(cls, dataset, indices): + selected = {} + adjusted_inds = [] + all_scalar = True + for kd, ind in zip(dataset.kdims[::-1], indices): + coords = cls.coords(dataset, kd.name) + if np.all(coords[1:] < coords[:-1]): + ind = cls.invert_index(ind, len(coords)) + if np.isscalar(ind): + ind = [ind] + else: + all_scalar = False + selected[kd.name] = coords[ind] + adjusted_inds.append(ind) + for kd in dataset.kdims: + if kd.name not in selected: + coords = cls.coords(dataset, kd.name) + selected[kd.name] = coords + all_scalar = False + for vd in dataset.vdims: + arr = dataset.dimension_values(vd, flat=False) + if all_scalar and len(dataset.vdims) == 1: + return arr[tuple(ind[0] for ind in adjusted_inds)] + selected[vd.name] = arr[tuple(adjusted_inds)] + return tuple(selected[d.name] for d in dataset.dimensions()) + + @classmethod def values(cls, dataset, dim, expanded=True, flat=True): dim = dataset.get_dimension(dim, strict=True) @@ -391,5 +438,28 @@ def sort(cls, dataset, by=[]): raise Exception('Compressed format cannot be sorted, either instantiate ' 'in the desired order or use the expanded format.') + @classmethod + def iloc(cls, dataset, index): + rows, cols = index + scalar = False + if np.isscalar(cols): + scalar = np.isscalar(rows) + cols = [dataset.get_dimension(cols, strict=True)] + elif isinstance(cols, slice): + cols = dataset.dimensions()[cols] + else: + cols = [dataset.get_dimension(d, strict=True) for d in cols] + + if np.isscalar(rows): + rows = [rows] + + new_data = [] + for d in cols: + new_data.append(dataset.dimension_values(d)[rows]) + + if scalar: + return new_data[0][0] + return tuple(new_data) + Interface.register(GridInterface) diff --git a/holoviews/core/data/image.py b/holoviews/core/data/image.py index 81a8bbb837..70ac560ffd 100644 --- a/holoviews/core/data/image.py +++ b/holoviews/core/data/image.py @@ -89,6 +89,13 @@ def reindex(cls, dataset, kdims=None, vdims=None): return data[..., inds] if len(inds) > 1 else data[..., inds[0]] return data + @classmethod + def coords(cls, dataset, dim, ordered=False, expanded=False): + dim = dataset.get_dimension(dim, strict=True) + if expanded: + return util.expand_grid_coords(dataset, dim) + return cls.values(dataset, dim, expanded=False) + @classmethod def range(cls, obj, dim): dim_idx = obj.get_dimension_index(dim) diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index 6b7d528da3..8694e95260 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -6,6 +6,80 @@ from .. import util +class iloc(object): + """ + iloc is small wrapper object that allows row, column based + indexing into a Dataset using the ``.iloc`` property. It supports + the usual numpy and pandas iloc indexing semantics including + integer indices, slices, lists and arrays of values. For more + information see the ``Dataset.iloc`` property docstring. + """ + + def __init__(self, dataset): + self.dataset = dataset + + def __getitem__(self, index): + index = util.wrap_tuple(index) + if len(index) == 1: + index = (index[0], slice(None)) + elif len(index) > 2: + raise IndexError('Tabular index not understood, index ' + 'must be at most length 2.') + + rows, cols = index + if rows is Ellipsis: + rows = slice(None) + data = self.dataset.interface.iloc(self.dataset, (rows, cols)) + kdims = self.dataset.kdims + vdims = self.dataset.vdims + if np.isscalar(data): + return data + elif cols == slice(None): + pass + else: + if isinstance(cols, slice): + dims = self.dataset.dimensions()[index[1]] + elif np.isscalar(cols): + dims = [self.dataset.get_dimension(cols)] + else: + dims = [self.dataset.get_dimension(d) for d in cols] + kdims = [d for d in dims if d in kdims] + vdims = [d for d in dims if d in vdims] + + datatype = [dt for dt in self.dataset.datatype + if dt in Interface.interfaces and + not Interface.interfaces[dt].gridded] + if not datatype: datatype = ['dataframe', 'dictionary'] + return self.dataset.clone(data, kdims=kdims, vdims=vdims, + datatype=datatype) + + +class ndloc(object): + """ + ndloc is a small wrapper object that allows ndarray-like indexing + for gridded Datasets using the ``.ndloc`` property. It supports + the standard NumPy ndarray indexing semantics including + integer indices, slices, lists and arrays of values. For more + information see the ``Dataset.ndloc`` property docstring. + """ + + def __init__(self, dataset): + self.dataset = dataset + + def __getitem__(self, indices): + ds = self.dataset + indices = util.wrap_tuple(indices) + if not ds.interface.gridded: + raise IndexError('Cannot use ndloc on non nd-dimensional datastructure') + selected = self.dataset.interface.ndloc(ds, indices) + if np.isscalar(selected): + return selected + params = {} + if hasattr(ds, 'bounds'): + params['bounds'] = None + return self.dataset.clone(selected, datatype=[ds.interface.datatype]+ds.datatype, **params) + + class Interface(param.Parameterized): interfaces = {} diff --git a/holoviews/core/data/ndelement.py b/holoviews/core/data/ndelement.py index 066b0dba34..dcc6e03bf4 100644 --- a/holoviews/core/data/ndelement.py +++ b/holoviews/core/data/ndelement.py @@ -8,7 +8,7 @@ from .interface import Interface from ..dimension import Dimension, Dimensioned from ..element import NdElement -from ..ndmapping import item_check +from ..ndmapping import item_check, OrderedDict from .. import util @@ -141,5 +141,30 @@ def unpack_scalar(cls, columns, data): else: return data + @classmethod + def iloc(cls, dataset, index): + data = dataset.columns() + rows, cols = index + scalar = False + if np.isscalar(cols): + scalar = np.isscalar(rows) + cols = [dataset.get_dimension(cols, strict=True)] + elif isinstance(cols, slice): + cols = dataset.dimensions()[cols] + else: + cols = [dataset.get_dimension(d, strict=True) for d in cols] + + if np.isscalar(rows): + rows = [rows] + + new_data = OrderedDict() + for d, values in data.items(): + if d in cols: + new_data[d] = values[rows] + + if scalar: + return new_data[cols[0].name][0] + return new_data + Interface.register(NdElementInterface) diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py index cee7e6e2fe..c49d6ac01a 100644 --- a/holoviews/core/data/pandas.py +++ b/holoviews/core/data/pandas.py @@ -61,8 +61,8 @@ def init(cls, eltype, data, kdims, vdims): data = cyODict(((c, col) for c, col in zip(columns, column_data))) elif isinstance(data, np.ndarray): if data.ndim == 1: - if eltype._auto_indexable_1d: - data = (range(len(data)), data) + if eltype._auto_indexable_1d and len(kdims)+len(vdims)>1: + data = (np.arange(len(data)), data) else: data = np.atleast_2d(data).T else: @@ -252,4 +252,25 @@ def dframe(cls, columns, dimensions): return columns.data.copy() + @classmethod + def iloc(cls, dataset, index): + rows, cols = index + scalar = False + columns = list(dataset.data.columns) + if isinstance(cols, slice): + cols = [d.name for d in dataset.dimensions()][cols] + elif np.isscalar(cols): + scalar = np.isscalar(rows) + cols = [dataset.get_dimension(cols).name] + else: + cols = [dataset.get_dimension(d).name for d in index[1]] + cols = [columns.index(c) for c in cols] + if np.isscalar(rows): + rows = [rows] + + if scalar: + return dataset.data.iloc[rows[0], cols[0]] + return dataset.data.iloc[rows, cols] + + Interface.register(PandasInterface) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 55bfd64f52..1e424c5375 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -228,8 +228,7 @@ def pprint_cell(self, row, col): return self.kdims[col].pprint_label else: dim = self.get_dimension(col) - values = self[dim.name] - return dim.pprint_value(values[row-1]) + return dim.pprint_value(self.iloc[row-1, col]) def cell_type(self, row, col): diff --git a/holoviews/core/util.py b/holoviews/core/util.py index e8770dd233..b93f89e1da 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -1449,7 +1449,7 @@ def bound_range(vals, density): """ low, high = vals.min(), vals.max() invert = False - if vals[0] > vals[1]: + if len(vals) > 1 and vals[0] > vals[1]: invert = True if not density: density = round(1./((high-low)/(len(vals)-1)), sys.float_info.dig) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index d629a3fc57..cb07d6ebbd 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -7,7 +7,7 @@ from ..core.data import ImageInterface from ..core import Dimension, Element2D, Overlay, Dataset from ..core.boundingregion import BoundingRegion, BoundingBox -from ..core.sheetcoords import SheetCoordinateSystem +from ..core.sheetcoords import SheetCoordinateSystem, Slice from ..core.util import max_range from .chart import Curve from .tabular import Table @@ -298,26 +298,79 @@ def select(self, selection_specs=None, **selection): coords = tuple(selection[kd.name] if kd.name in selection else slice(None) for kd in self.kdims) + shape = self.interface.shape(self, gridded=True) if any([isinstance(el, slice) for el in coords]): - shape = self.interface.shape(self, gridded=True) bounds = compute_slice_bounds(coords, self, shape[:2]) xdim, ydim = self.kdims l, b, r, t = bounds.lbrt() - selection = {xdim.name: slice(l, r), ydim.name: slice(b, t)} - else: - selection = {kd.name: c for kd, c in zip(self.kdims, self.closest(coords))} - data = self.interface.select(self, **selection) - if isinstance(data, np.ndarray) and data.ndim == 1: - return self.clone([tuple(data)], kdims=[], new_type=Dataset) - elif np.isscalar(data): - return data + # Situate resampled region into overall slice + y0, y1, x0, x1 = Slice(bounds, self) + y0, y1 = shape[0]-y1, shape[0]-y0 + selection = (slice(y0, y1), slice(x0, x1)) + sliced = True + else: + y, x = self.sheet2matrixidx(coords[0], coords[1]) + y = shape[0]-y-1 + selection = (y, x) + sliced = False + + data = self.interface.ndloc(self, selection) + if not sliced: + if np.isscalar(data): + return data + return self.clone(data[self.ndims:], kdims=[], new_type=Dataset) else: return self.clone(data, xdensity=self.xdensity, ydensity=self.ydensity, bounds=bounds) + def sample(self, samples=[], **kwargs): + """ + Allows sampling of an Image as an iterator of coordinates + matching the key dimensions, returning a new object containing + just the selected samples. Alternatively may supply kwargs to + sample a coordinate on an object. On an Image the coordinates + are continuously indexed and will always snap to the nearest + coordinate. + """ + kwargs = {k: v for k, v in kwargs.items() if k != 'closest'} + if kwargs and samples: + raise Exception('Supply explicit list of samples or kwargs, not both.') + elif kwargs: + sample = [slice(None) for _ in range(self.ndims)] + for dim, val in kwargs.items(): + sample[self.get_dimension_index(dim)] = val + samples = [tuple(sample)] + + # If a 1D cross-section of 2D space return Curve + shape = self.interface.shape(self, gridded=True) + if len(samples) == 1: + dims = [kd for kd, v in zip(self.kdims, samples[0]) if not np.isscalar(v)] + if len(dims) == 1: + kdims = [self.get_dimension(kd) for kd in dims] + sel = {kd.name: s for kd, s in zip(self.kdims, samples[0])} + dims = [kd for kd, v in sel.items() if not np.isscalar(v)] + selection = self.select(**sel) + selection = tuple(selection.columns(kdims+self.vdims).values()) + datatype = list(util.unique_iterator(self.datatype+['dataframe', 'dict'])) + return self.clone(selection, kdims=kdims, new_type=Curve, + datatype=datatype) + else: + new_type = Table + kdims = self.kdims + else: + new_type = Dataset + kdims = self.kdims + + xs, ys = zip(*samples) + yidx, xidx = self.sheet2matrixidx(np.array(xs), np.array(ys)) + yidx = shape[0]-yidx-1 + data = self.interface.ndloc(self, (yidx, xidx)) + return self.clone(data, new_type=Table, datatype=['dataframe', 'dict']) + + def closest(self, coords=[], **kwargs): """ Given a single coordinate or multiple coordinates as diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index 7530791dfe..47bc2d00ae 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -597,17 +597,7 @@ def _process_layer(self, element, key=None): if len(sliced) > self.p.max_samples: prng = np.random.RandomState(self.p.random_seed) - length = len(sliced) - if element.interface is PandasInterface: - data = sliced.data.sample(self.p.max_samples, - random_state=prng) - else: - inds = prng.choice(length, self.p.max_samples, False) - if isinstance(element.interface, DictInterface): - data = {k: v[inds] for k, v in sliced.data.items()} - else: - data = sliced.data[inds, :] - sliced = element.clone(data) + return element.iloc[prng.choice(len(sliced), self.p.max_samples, False)] return sliced def _process(self, element, key=None): diff --git a/tests/testdataset.py b/tests/testdataset.py index 87bb4ee86b..56af76ef4a 100644 --- a/tests/testdataset.py +++ b/tests/testdataset.py @@ -59,10 +59,10 @@ def setUp(self): self.data_instance_type = None def init_column_data(self): - self.xs = range(11) - self.xs_2 = [el**2 for el in self.xs] + self.xs = np.array(range(11)) + self.xs_2 = self.xs**2 - self.y_ints = [i*2 for i in range(11)] + self.y_ints = self.xs*2 self.dataset_hm = Dataset((self.xs, self.y_ints), kdims=['x'], vdims=['y']) self.dataset_hm_alias = Dataset((self.xs, self.y_ints), @@ -248,6 +248,89 @@ def test_dataset_array_ht(self): self.assertEqual(self.dataset_hm.array(), np.column_stack([self.xs, self.y_ints])) + # Tabular indexing + + def test_dataset_iloc_slice_rows(self): + sliced = self.dataset_hm.iloc[1:4] + table = Dataset({'x': self.xs[1:4], 'y': self.y_ints[1:4]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_slice_rows_slice_cols(self): + sliced = self.dataset_hm.iloc[1:4, 1:] + table = Dataset({'y': self.y_ints[1:4]}, kdims=[], vdims=['y'], + datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_slice_rows_list_cols(self): + sliced = self.dataset_hm.iloc[1:4, [0, 1]] + table = Dataset({'x': self.xs[1:4], 'y': self.y_ints[1:4]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_slice_rows_index_cols(self): + sliced = self.dataset_hm.iloc[1:4, 1] + table = Dataset({'y': self.y_ints[1:4]}, kdims=[], vdims=['y'], + datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows(self): + sliced = self.dataset_hm.iloc[[0, 2]] + table = Dataset({'x': self.xs[[0, 2]], 'y': self.y_ints[[0, 2]]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows_list_cols(self): + sliced = self.dataset_hm.iloc[[0, 2], [0, 1]] + table = Dataset({'x': self.xs[[0, 2]], 'y': self.y_ints[[0, 2]]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows_list_cols_by_name(self): + sliced = self.dataset_hm.iloc[[0, 2], ['x', 'y']] + table = Dataset({'x': self.xs[[0, 2]], 'y': self.y_ints[[0, 2]]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows_slice_cols(self): + sliced = self.dataset_hm.iloc[[0, 2], slice(0, 2)] + table = Dataset({'x': self.xs[[0, 2]], 'y': self.y_ints[[0, 2]]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_index_rows_index_cols(self): + indexed = self.dataset_hm.iloc[1, 1] + self.assertEqual(indexed, self.y_ints[1]) + + def test_dataset_iloc_index_rows_slice_cols(self): + indexed = self.dataset_hm.iloc[1, :2] + table = Dataset({'x':self.xs[[1]], 'y':self.y_ints[[1]]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(indexed, table) + + def test_dataset_iloc_list_cols(self): + sliced = self.dataset_hm.iloc[:, [0, 1]] + table = Dataset({'x':self.xs, 'y':self.y_ints}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_cols_by_name(self): + sliced = self.dataset_hm.iloc[:, ['x', 'y']] + table = Dataset({'x':self.xs, 'y':self.y_ints}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_ellipsis_list_cols(self): + sliced = self.dataset_hm.iloc[..., [0, 1]] + table = Dataset({'x':self.xs, 'y':self.y_ints}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_ellipsis_list_cols_by_name(self): + sliced = self.dataset_hm.iloc[..., ['x', 'y']] + table = Dataset({'x':self.xs, 'y':self.y_ints}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) class HeterogeneousColumnTypes(HomogeneousColumnTypes): @@ -258,8 +341,8 @@ class HeterogeneousColumnTypes(HomogeneousColumnTypes): def init_column_data(self): self.kdims = ['Gender', 'Age'] self.vdims = ['Weight', 'Height'] - self.gender, self.age = ['M','M','F'], [10,16,12] - self.weight, self.height = [15,18,10], [0.8,0.6,0.8] + self.gender, self.age = np.array(['M','M','F']), np.array([10,16,12]) + self.weight, self.height = np.array([15,18,10]), np.array([0.8,0.6,0.8]) self.table = Dataset({'Gender':self.gender, 'Age':self.age, 'Weight':self.weight, 'Height':self.height}, kdims=self.kdims, vdims=self.vdims) @@ -551,6 +634,91 @@ def test_dataset_value_dim_scalar_index(self): row = self.table['M', 10, 'Weight'] self.assertEquals(row, 15) + # Tabular indexing + + def test_dataset_iloc_slice_rows(self): + sliced = self.table.iloc[1:2] + table = Dataset({'Gender':self.gender[1:2], 'Age':self.age[1:2], + 'Weight':self.weight[1:2], 'Height':self.height[1:2]}, + kdims=self.kdims, vdims=self.vdims) + self.assertEqual(sliced, table) + + def test_dataset_iloc_slice_rows_slice_cols(self): + sliced = self.table.iloc[1:2, 1:3] + table = Dataset({'Age':self.age[1:2], 'Weight':self.weight[1:2]}, + kdims=self.kdims[1:], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_slice_rows_list_cols(self): + sliced = self.table.iloc[1:2, [1, 3]] + table = Dataset({'Age':self.age[1:2], 'Height':self.height[1:2]}, + kdims=self.kdims[1:], vdims=self.vdims[1:]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_slice_rows_index_cols(self): + sliced = self.table.iloc[1:2, 2] + table = Dataset({'Weight':self.weight[1:2]}, kdims=[], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows(self): + sliced = self.table.iloc[[0, 2]] + table = Dataset({'Gender':self.gender[[0, 2]], 'Age':self.age[[0, 2]], + 'Weight':self.weight[[0, 2]], 'Height':self.height[[0, 2]]}, + kdims=self.kdims, vdims=self.vdims) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows_list_cols(self): + sliced = self.table.iloc[[0, 2], [0, 2]] + table = Dataset({'Gender':self.gender[[0, 2]], 'Weight':self.weight[[0, 2]]}, + kdims=self.kdims[:1], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows_list_cols_by_name(self): + sliced = self.table.iloc[[0, 2], ['Gender', 'Weight']] + table = Dataset({'Gender':self.gender[[0, 2]], 'Weight':self.weight[[0, 2]]}, + kdims=self.kdims[:1], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows_slice_cols(self): + sliced = self.table.iloc[[0, 2], slice(1, 3)] + table = Dataset({'Age':self.age[[0, 2]], 'Weight':self.weight[[0, 2]]}, + kdims=self.kdims[1:], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_index_rows_index_cols(self): + indexed = self.table.iloc[1, 1] + self.assertEqual(indexed, self.age[1]) + + def test_dataset_iloc_index_rows_slice_cols(self): + indexed = self.table.iloc[1, 1:3] + table = Dataset({'Age':self.age[[1]], 'Weight':self.weight[[1]]}, + kdims=self.kdims[1:], vdims=self.vdims[:1]) + self.assertEqual(indexed, table) + + def test_dataset_iloc_list_cols(self): + sliced = self.table.iloc[:, [0, 2]] + table = Dataset({'Gender':self.gender, 'Weight':self.weight}, + kdims=self.kdims[:1], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_cols_by_name(self): + sliced = self.table.iloc[:, ['Gender', 'Weight']] + table = Dataset({'Gender':self.gender, 'Weight':self.weight}, + kdims=self.kdims[:1], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_ellipsis_list_cols(self): + sliced = self.table.iloc[..., [0, 2]] + table = Dataset({'Gender':self.gender, 'Weight':self.weight}, + kdims=self.kdims[:1], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_ellipsis_list_cols_by_name(self): + sliced = self.table.iloc[..., ['Gender', 'Weight']] + table = Dataset({'Gender':self.gender, 'Weight':self.weight}, + kdims=self.kdims[:1], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + # Casting def test_dataset_array_ht(self): @@ -562,6 +730,9 @@ class ArrayDatasetTest(HomogeneousColumnTypes, ComparisonTestCase): """ Test of the ArrayDataset interface. """ + + datatype = 'array' + def setUp(self): self.restore_datatype = Dataset.datatype Dataset.datatype = ['array'] @@ -574,6 +745,8 @@ class DFDatasetTest(HeterogeneousColumnTypes, ComparisonTestCase): Test of the pandas DFDataset interface. """ + datatype = 'dataframe' + def setUp(self): if pd is None: raise SkipTest("Pandas not available") @@ -588,6 +761,8 @@ class DaskDatasetTest(HeterogeneousColumnTypes, ComparisonTestCase): Test of the pandas DaskDataset interface. """ + datatype = 'dask' + def setUp(self): if dd is None: raise SkipTest("dask not available") @@ -630,6 +805,8 @@ class DictDatasetTest(HeterogeneousColumnTypes, ComparisonTestCase): Test of the generic dictionary interface. """ + datatype = 'dictionary' + def setUp(self): self.restore_datatype = Dataset.datatype Dataset.datatype = ['dictionary'] @@ -695,22 +872,22 @@ def setUp(self): self.restore_datatype = Dataset.datatype Dataset.datatype = ['grid'] self.data_instance_type = dict - self.init_data() + self.init_column_data() + self.init_grid_data() def init_column_data(self): - self.xs = range(11) - self.xs_2 = [el**2 for el in self.xs] - - self.y_ints = [i*2 for i in range(11)] + self.xs = np.arange(11) + self.xs_2 = self.xs**2 + self.y_ints = self.xs*2 self.dataset_hm = Dataset((self.xs, self.y_ints), kdims=['x'], vdims=['y']) self.dataset_hm_alias = Dataset((self.xs, self.y_ints), kdims=[('x', 'X')], vdims=[('y', 'Y')]) def init_grid_data(self): - self.grid_xs = [0, 1] - self.grid_ys = [0.1, 0.2, 0.3] - self.grid_zs = [[0, 1], [2, 3], [4, 5]] + self.grid_xs = np.array([0, 1]) + self.grid_ys = np.array([0.1, 0.2, 0.3]) + self.grid_zs = np.array([[0, 1], [2, 3], [4, 5]]) self.dataset_grid = self.eltype((self.grid_xs, self.grid_ys, self.grid_zs), kdims=['x', 'y'], vdims=['z']) @@ -734,6 +911,44 @@ def test_canonical_vdim(self): self.assertEqual(dataset.dimension_values('z', flat=False), canonical) + def test_dataset_ndloc_index(self): + xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5) + arr = np.arange(10)*np.arange(5)[np.newaxis].T + ds = Dataset((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype]) + self.assertEqual(ds.ndloc[0,0], arr[0, 0]) + + def test_dataset_ndloc_index2(self): + xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5) + arr = np.arange(10)*np.arange(5)[np.newaxis].T + ds = Dataset((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype]) + self.assertEqual(ds.ndloc[4, 9], arr[4, 9]) + + def test_dataset_ndloc_slice(self): + xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5) + arr = np.arange(10)*np.arange(5)[np.newaxis].T + ds = Dataset((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype]) + sliced = Dataset((xs[2:5], ys[1:], arr[1:, 2:5]), kdims=['x', 'y'], vdims=['z'], + datatype=[self.datatype]) + self.assertEqual(ds.ndloc[1:, 2:5], sliced) + + def test_dataset_ndloc_lists(self): + xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5) + arr = np.arange(10)*np.arange(5)[np.newaxis].T + ds = Dataset((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype, 'dictionary']) + sliced = Dataset((xs[[1, 2, 3]], ys[[0, 1, 2]], arr[[0, 1, 2], [1, 2, 3]]), kdims=['x', 'y'], vdims=['z'], + datatype=['dictionary']) + self.assertEqual(ds.ndloc[[0, 1, 2], [1, 2, 3]], sliced) + + def test_dataset_ndloc_slice_two_vdims(self): + xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5) + arr = np.arange(10)*np.arange(5)[np.newaxis].T + arr2 = (np.arange(10)*np.arange(5)[np.newaxis].T)[::-1] + ds = Dataset((xs, ys, arr, arr2), kdims=['x', 'y'], vdims=['z', 'z2'], datatype=[self.datatype, 'dictionary']) + sliced = Dataset((xs[[1, 2, 3]], ys[[0, 1, 2]], arr[[0, 1, 2], [1, 2, 3]], + arr2[[0, 1, 2], [1, 2, 3]]), kdims=['x', 'y'], vdims=['z', 'z2'], + datatype=['dictionary']) + self.assertEqual(ds.ndloc[[0, 1, 2], [1, 2, 3]], sliced) + def test_dataset_dim_vals_grid_kdims_xs(self): self.assertEqual(self.dataset_grid.dimension_values(0, expanded=False), np.array([0, 1])) @@ -1071,6 +1286,9 @@ def test_dataset_groupby_drop_dims_with_vdim(self): def test_dataset_groupby_drop_dims_dynamic_with_vdim(self): raise SkipTest("Not supported") + def test_dataset_ndloc_slice_two_vdims(self): + raise SkipTest("Not supported") + @attr(optional=1) class XArrayDatasetTest(GridDatasetTest): @@ -1132,10 +1350,10 @@ class XArrayDaskArrayDatasetTest(XArrayDatasetTest): def init_column_data(self): import dask.array - self.xs = range(11) - self.xs_2 = [el**2 for el in self.xs] + self.xs = np.array(range(11)) + self.xs_2 = self.xs**2 - self.y_ints = [i*2 for i in range(11)] + self.y_ints = self.xs*2 dask_y = dask.array.from_array(np.array(self.y_ints), 2) self.dataset_hm = Dataset((self.xs, dask_y), kdims=['x'], vdims=['y']) diff --git a/tests/testimageinterfaces.py b/tests/testimageinterfaces.py index 698ef195b7..9e6924b5b5 100644 --- a/tests/testimageinterfaces.py +++ b/tests/testimageinterfaces.py @@ -2,7 +2,7 @@ from nose.plugins.attrib import attr import numpy as np -from holoviews import Dimension, Image, Curve, RGB, HSV, Dataset +from holoviews import Dimension, Image, Curve, RGB, HSV, Dataset, Table from holoviews.element.comparison import ComparisonTestCase from .testdataset import DatatypeContext @@ -118,6 +118,18 @@ def test_sample_ycoord(self): self.assertEqual(self.image.sample(y=5), Curve((xs, zs), kdims=['x'], vdims=['z'])) + def test_sample_coords(self): + arr = np.arange(10)*np.arange(5)[np.newaxis].T + xs = np.linspace(0.12, 0.81, 10) + ys = np.linspace(0.12, 0.391, 5) + img = Image((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype]) + sampled = img.sample([(0.15, 0.15), (0.15, 0.4), (0.8, 0.4), (0.8, 0.15)]) + self.assertIsInstance(sampled, Table) + yidx = [0, 4, 4, 0] + xidx = [0, 0, 9, 9] + table = Table((xs[xidx], ys[yidx], arr[yidx, xidx]), kdims=['x', 'y'], vdims=['z']) + self.assertEqual(sampled, table) + def test_reduce_to_scalar(self): self.assertEqual(self.image.reduce(['x', 'y'], function=np.mean), 20.25)