From 3b9b4ea06f62ec891e1a3e2be7badb998efed204 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 14 May 2017 20:57:08 +0100 Subject: [PATCH 01/20] Small fix for auto-indexing --- holoviews/core/data/array.py | 11 ++++++----- holoviews/core/data/dictionary.py | 2 +- holoviews/core/data/pandas.py | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py index 69a55c2864..722abe399f 100644 --- a/holoviews/core/data/array.py +++ b/holoviews/core/data/array.py @@ -54,18 +54,19 @@ def init(cls, eltype, data, kdims, vdims): except: data = None + if kdims is None: + kdims = eltype.kdims + if vdims is None: + vdims = eltype.vdims + if data is None or data.ndim > 2 or data.dtype.kind in ['S', 'U', 'O']: raise ValueError("ArrayInterface interface could not handle input type.") elif data.ndim == 1: - if eltype._auto_indexable_1d: + if eltype._auto_indexable_1d and len(kdims)+len(vdims)>1: data = np.column_stack([np.arange(len(data)), data]) else: data = np.atleast_2d(data).T - if kdims is None: - kdims = eltype.kdims - if vdims is None: - vdims = eltype.vdims return data, {'kdims':kdims, 'vdims':vdims}, {} @classmethod diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py index 7668f54898..812e88cc6b 100644 --- a/holoviews/core/data/dictionary.py +++ b/holoviews/core/data/dictionary.py @@ -49,7 +49,7 @@ def init(cls, eltype, data, kdims, vdims): data = {d: data[d] for d in dimensions} elif isinstance(data, np.ndarray): if data.ndim == 1: - if eltype._auto_indexable_1d: + if eltype._auto_indexable_1d and len(kdims)+len(vdims)>1: data = np.column_stack([np.arange(len(data)), data]) else: data = np.atleast_2d(data).T diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py index cee7e6e2fe..0e9e0e96d8 100644 --- a/holoviews/core/data/pandas.py +++ b/holoviews/core/data/pandas.py @@ -61,8 +61,8 @@ def init(cls, eltype, data, kdims, vdims): data = cyODict(((c, col) for c, col in zip(columns, column_data))) elif isinstance(data, np.ndarray): if data.ndim == 1: - if eltype._auto_indexable_1d: - data = (range(len(data)), data) + if eltype._auto_indexable_1d and len(kdims)+len(vdims)>1: + data = (np.arange(len(data)), data) else: data = np.atleast_2d(data).T else: From c2bc41be3a1801dd44ca06df8a6e3a66a1f63bf4 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 14 May 2017 20:59:18 +0100 Subject: [PATCH 02/20] Added iloc tabular indexing interface --- holoviews/core/data/__init__.py | 10 +- holoviews/core/data/array.py | 19 +++ holoviews/core/data/dask.py | 26 +++- holoviews/core/data/dictionary.py | 25 ++++ holoviews/core/data/grid.py | 23 ++++ holoviews/core/data/interface.py | 47 ++++++++ holoviews/core/data/ndelement.py | 27 ++++- holoviews/core/data/pandas.py | 21 ++++ tests/testdataset.py | 194 ++++++++++++++++++++++++++++-- 9 files changed, 380 insertions(+), 12 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index b7baec5abe..90dd463a6e 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -9,7 +9,7 @@ import param from ..dimension import redim -from .interface import Interface +from .interface import Interface, TabularIndex from .array import ArrayInterface from .dictionary import DictInterface from .grid import GridInterface @@ -624,6 +624,14 @@ def to(self): return self._conversion_interface(self) + @property + def iloc(self): + """ + Returns a TabularIndex, providing a convenient interface to + slice and index into the Dataset using row and column indices. + """ + return TabularIndex(self) + # Aliases for pickle backward compatibility Columns = Dataset diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py index 722abe399f..073258a246 100644 --- a/holoviews/core/data/array.py +++ b/holoviews/core/data/array.py @@ -233,4 +233,23 @@ def aggregate(cls, dataset, dimensions, function, **kwargs): return np.atleast_2d(rows) + @classmethod + def iloc(cls, dataset, index): + rows, cols = index + if np.isscalar(cols): + if isinstance(cols, util.basestring): + cols = dataset.get_dimension_index(cols) + if np.isscalar(rows): + return dataset.data[rows, cols] + cols = [dataset.get_dimension_index(cols)] + elif not isinstance(cols, slice): + cols = [dataset.get_dimension_index(d) for d in cols] + + if np.isscalar(rows): + rows = [rows] + data = dataset.data[rows, :][:, cols] + if data.ndim == 1: + return np.atleast_2d(data).T + return data + Interface.register(ArrayInterface) diff --git a/holoviews/core/data/dask.py b/holoviews/core/data/dask.py index 88e4ca21a9..984dbd8efc 100644 --- a/holoviews/core/data/dask.py +++ b/holoviews/core/data/dask.py @@ -12,7 +12,7 @@ from .. import util from ..element import Element -from ..ndmapping import NdMapping, item_check +from ..ndmapping import NdMapping, item_check, OrderedDict from .interface import Interface from .pandas import PandasInterface @@ -241,6 +241,30 @@ def dframe(cls, columns, dimensions): def nonzero(cls, dataset): return True + @classmethod + def iloc(cls, dataset, index): + """ + Dask does not support iloc, therefore iloc will execute + the call graph and lose the laziness of the operation. + """ + rows, cols = index + scalar = False + if isinstance(cols, slice): + cols = [d.name for d in dataset.dimensions()][cols] + elif np.isscalar(cols): + scalar = np.isscalar(rows) + cols = [dataset.get_dimension(cols).name] + else: + cols = [dataset.get_dimension(d).name for d in index[1]] + if np.isscalar(rows): + rows = [rows] + + data = OrderedDict() + for c in cols: + data[c] = dataset.data[c].compute().iloc[rows].values + if scalar: + return data[cols[0]][0] + return tuple(data.values()) Interface.register(DaskInterface) diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py index 812e88cc6b..e912aa3a9b 100644 --- a/holoviews/core/data/dictionary.py +++ b/holoviews/core/data/dictionary.py @@ -261,4 +261,29 @@ def aggregate(cls, dataset, kdims, function, **kwargs): return aggregated + @classmethod + def iloc(cls, dataset, index): + rows, cols = index + scalar = False + if np.isscalar(cols): + scalar = np.isscalar(rows) + cols = [dataset.get_dimension(cols, strict=True)] + elif isinstance(cols, slice): + cols = dataset.dimensions()[cols] + else: + cols = [dataset.get_dimension(d, strict=True) for d in cols] + + if np.isscalar(rows): + rows = [rows] + + new_data = OrderedDict() + for d, values in dataset.data.items(): + if d in cols: + new_data[d] = values[rows] + + if scalar: + return new_data[cols[0].name][0] + return new_data + + Interface.register(DictInterface) diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py index 45fecb8d5b..1c42d511c0 100644 --- a/holoviews/core/data/grid.py +++ b/holoviews/core/data/grid.py @@ -391,5 +391,28 @@ def sort(cls, dataset, by=[]): raise Exception('Compressed format cannot be sorted, either instantiate ' 'in the desired order or use the expanded format.') + @classmethod + def iloc(cls, dataset, index): + rows, cols = index + scalar = False + if np.isscalar(cols): + scalar = np.isscalar(rows) + cols = [dataset.get_dimension(cols, strict=True)] + elif isinstance(cols, slice): + cols = dataset.dimensions()[cols] + else: + cols = [dataset.get_dimension(d, strict=True) for d in cols] + + if np.isscalar(rows): + rows = [rows] + + new_data = [] + for d in cols: + new_data.append(dataset.dimension_values(d)[rows]) + + if scalar: + return new_data[0][0] + return tuple(new_data) + Interface.register(GridInterface) diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index 6b7d528da3..f25fbea38a 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -6,6 +6,53 @@ from .. import util +class TabularIndex(object): + """ + TabularIndex is small wrapper object that allows row, column + based indexing into a Dataset using the ``iloc`` property. + It supports the usual numpy and pandas iloc indexing semantics + including integer indices, slices, lists and arrays of values. + """ + + def __init__(self, dataset): + self.dataset = dataset + + def __getitem__(self, index): + index = util.wrap_tuple(index) + if len(index) == 1: + index = (index[0], slice(None)) + elif len(index) > 2: + raise IndexError('Tabular index not understood, index ' + 'must be at most length 2.') + + rows, cols = index + if rows is Ellipsis: + rows = slice(None) + data = self.dataset.interface.iloc(self.dataset, (rows, cols)) + kdims = self.dataset.kdims + vdims = self.dataset.vdims + if np.isscalar(data): + return data + elif cols == slice(None): + pass + else: + if isinstance(cols, slice): + dims = self.dataset.dimensions()[index[1]] + elif np.isscalar(cols): + dims = [self.dataset.get_dimension(cols)] + else: + dims = [self.dataset.get_dimension(d) for d in cols] + kdims = [d for d in dims if d in kdims] + vdims = [d for d in dims if d in vdims] + + datatype = [dt for dt in self.dataset.datatype + if dt in Interface.interfaces and + not Interface.interfaces[dt].gridded] + if not datatype: datatype = ['dataframe', 'dictionary'] + return self.dataset.clone(data, kdims=kdims, vdims=vdims, + datatype=datatype) + + class Interface(param.Parameterized): interfaces = {} diff --git a/holoviews/core/data/ndelement.py b/holoviews/core/data/ndelement.py index 066b0dba34..dcc6e03bf4 100644 --- a/holoviews/core/data/ndelement.py +++ b/holoviews/core/data/ndelement.py @@ -8,7 +8,7 @@ from .interface import Interface from ..dimension import Dimension, Dimensioned from ..element import NdElement -from ..ndmapping import item_check +from ..ndmapping import item_check, OrderedDict from .. import util @@ -141,5 +141,30 @@ def unpack_scalar(cls, columns, data): else: return data + @classmethod + def iloc(cls, dataset, index): + data = dataset.columns() + rows, cols = index + scalar = False + if np.isscalar(cols): + scalar = np.isscalar(rows) + cols = [dataset.get_dimension(cols, strict=True)] + elif isinstance(cols, slice): + cols = dataset.dimensions()[cols] + else: + cols = [dataset.get_dimension(d, strict=True) for d in cols] + + if np.isscalar(rows): + rows = [rows] + + new_data = OrderedDict() + for d, values in data.items(): + if d in cols: + new_data[d] = values[rows] + + if scalar: + return new_data[cols[0].name][0] + return new_data + Interface.register(NdElementInterface) diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py index 0e9e0e96d8..c49d6ac01a 100644 --- a/holoviews/core/data/pandas.py +++ b/holoviews/core/data/pandas.py @@ -252,4 +252,25 @@ def dframe(cls, columns, dimensions): return columns.data.copy() + @classmethod + def iloc(cls, dataset, index): + rows, cols = index + scalar = False + columns = list(dataset.data.columns) + if isinstance(cols, slice): + cols = [d.name for d in dataset.dimensions()][cols] + elif np.isscalar(cols): + scalar = np.isscalar(rows) + cols = [dataset.get_dimension(cols).name] + else: + cols = [dataset.get_dimension(d).name for d in index[1]] + cols = [columns.index(c) for c in cols] + if np.isscalar(rows): + rows = [rows] + + if scalar: + return dataset.data.iloc[rows[0], cols[0]] + return dataset.data.iloc[rows, cols] + + Interface.register(PandasInterface) diff --git a/tests/testdataset.py b/tests/testdataset.py index 87bb4ee86b..99835b14eb 100644 --- a/tests/testdataset.py +++ b/tests/testdataset.py @@ -59,10 +59,10 @@ def setUp(self): self.data_instance_type = None def init_column_data(self): - self.xs = range(11) - self.xs_2 = [el**2 for el in self.xs] + self.xs = np.array(range(11)) + self.xs_2 = self.xs**2 - self.y_ints = [i*2 for i in range(11)] + self.y_ints = self.xs*2 self.dataset_hm = Dataset((self.xs, self.y_ints), kdims=['x'], vdims=['y']) self.dataset_hm_alias = Dataset((self.xs, self.y_ints), @@ -248,6 +248,89 @@ def test_dataset_array_ht(self): self.assertEqual(self.dataset_hm.array(), np.column_stack([self.xs, self.y_ints])) + # Tabular indexing + + def test_dataset_iloc_slice_rows(self): + sliced = self.dataset_hm.iloc[1:4] + table = Dataset({'x': self.xs[1:4], 'y': self.y_ints[1:4]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_slice_rows_slice_cols(self): + sliced = self.dataset_hm.iloc[1:4, 1:] + table = Dataset({'y': self.y_ints[1:4]}, kdims=[], vdims=['y'], + datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_slice_rows_list_cols(self): + sliced = self.dataset_hm.iloc[1:4, [0, 1]] + table = Dataset({'x': self.xs[1:4], 'y': self.y_ints[1:4]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_slice_rows_index_cols(self): + sliced = self.dataset_hm.iloc[1:4, 1] + table = Dataset({'y': self.y_ints[1:4]}, kdims=[], vdims=['y'], + datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows(self): + sliced = self.dataset_hm.iloc[[0, 2]] + table = Dataset({'x': self.xs[[0, 2]], 'y': self.y_ints[[0, 2]]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows_list_cols(self): + sliced = self.dataset_hm.iloc[[0, 2], [0, 1]] + table = Dataset({'x': self.xs[[0, 2]], 'y': self.y_ints[[0, 2]]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows_list_cols_by_name(self): + sliced = self.dataset_hm.iloc[[0, 2], ['x', 'y']] + table = Dataset({'x': self.xs[[0, 2]], 'y': self.y_ints[[0, 2]]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows_slice_cols(self): + sliced = self.dataset_hm.iloc[[0, 2], slice(0, 2)] + table = Dataset({'x': self.xs[[0, 2]], 'y': self.y_ints[[0, 2]]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_index_rows_index_cols(self): + indexed = self.dataset_hm.iloc[1, 1] + self.assertEqual(indexed, self.y_ints[1]) + + def test_dataset_iloc_index_rows_slice_cols(self): + indexed = self.dataset_hm.iloc[1, :2] + table = Dataset({'x':self.xs[[1]], 'y':self.y_ints[[1]]}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(indexed, table) + + def test_dataset_iloc_list_cols(self): + sliced = self.dataset_hm.iloc[:, [0, 1]] + table = Dataset({'x':self.xs, 'y':self.y_ints}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_cols_by_name(self): + sliced = self.dataset_hm.iloc[:, ['x', 'y']] + table = Dataset({'x':self.xs, 'y':self.y_ints}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_ellipsis_list_cols(self): + sliced = self.dataset_hm.iloc[..., [0, 1]] + table = Dataset({'x':self.xs, 'y':self.y_ints}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) + + def test_dataset_iloc_ellipsis_list_cols_by_name(self): + sliced = self.dataset_hm.iloc[..., ['x', 'y']] + table = Dataset({'x':self.xs, 'y':self.y_ints}, + kdims=['x'], vdims=['y'], datatype=['dictionary']) + self.assertEqual(sliced, table) class HeterogeneousColumnTypes(HomogeneousColumnTypes): @@ -258,8 +341,8 @@ class HeterogeneousColumnTypes(HomogeneousColumnTypes): def init_column_data(self): self.kdims = ['Gender', 'Age'] self.vdims = ['Weight', 'Height'] - self.gender, self.age = ['M','M','F'], [10,16,12] - self.weight, self.height = [15,18,10], [0.8,0.6,0.8] + self.gender, self.age = np.array(['M','M','F']), np.array([10,16,12]) + self.weight, self.height = np.array([15,18,10]), np.array([0.8,0.6,0.8]) self.table = Dataset({'Gender':self.gender, 'Age':self.age, 'Weight':self.weight, 'Height':self.height}, kdims=self.kdims, vdims=self.vdims) @@ -551,6 +634,91 @@ def test_dataset_value_dim_scalar_index(self): row = self.table['M', 10, 'Weight'] self.assertEquals(row, 15) + # Tabular indexing + + def test_dataset_iloc_slice_rows(self): + sliced = self.table.iloc[1:2] + table = Dataset({'Gender':self.gender[1:2], 'Age':self.age[1:2], + 'Weight':self.weight[1:2], 'Height':self.height[1:2]}, + kdims=self.kdims, vdims=self.vdims) + self.assertEqual(sliced, table) + + def test_dataset_iloc_slice_rows_slice_cols(self): + sliced = self.table.iloc[1:2, 1:3] + table = Dataset({'Age':self.age[1:2], 'Weight':self.weight[1:2]}, + kdims=self.kdims[1:], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_slice_rows_list_cols(self): + sliced = self.table.iloc[1:2, [1, 3]] + table = Dataset({'Age':self.age[1:2], 'Height':self.height[1:2]}, + kdims=self.kdims[1:], vdims=self.vdims[1:]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_slice_rows_index_cols(self): + sliced = self.table.iloc[1:2, 2] + table = Dataset({'Weight':self.weight[1:2]}, kdims=[], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows(self): + sliced = self.table.iloc[[0, 2]] + table = Dataset({'Gender':self.gender[[0, 2]], 'Age':self.age[[0, 2]], + 'Weight':self.weight[[0, 2]], 'Height':self.height[[0, 2]]}, + kdims=self.kdims, vdims=self.vdims) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows_list_cols(self): + sliced = self.table.iloc[[0, 2], [0, 2]] + table = Dataset({'Gender':self.gender[[0, 2]], 'Weight':self.weight[[0, 2]]}, + kdims=self.kdims[:1], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows_list_cols_by_name(self): + sliced = self.table.iloc[[0, 2], ['Gender', 'Weight']] + table = Dataset({'Gender':self.gender[[0, 2]], 'Weight':self.weight[[0, 2]]}, + kdims=self.kdims[:1], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_rows_slice_cols(self): + sliced = self.table.iloc[[0, 2], slice(1, 3)] + table = Dataset({'Age':self.age[[0, 2]], 'Weight':self.weight[[0, 2]]}, + kdims=self.kdims[1:], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_index_rows_index_cols(self): + indexed = self.table.iloc[1, 1] + self.assertEqual(indexed, self.age[1]) + + def test_dataset_iloc_index_rows_slice_cols(self): + indexed = self.table.iloc[1, 1:3] + table = Dataset({'Age':self.age[[1]], 'Weight':self.weight[[1]]}, + kdims=self.kdims[1:], vdims=self.vdims[:1]) + self.assertEqual(indexed, table) + + def test_dataset_iloc_list_cols(self): + sliced = self.table.iloc[:, [0, 2]] + table = Dataset({'Gender':self.gender, 'Weight':self.weight}, + kdims=self.kdims[:1], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_list_cols_by_name(self): + sliced = self.table.iloc[:, ['Gender', 'Weight']] + table = Dataset({'Gender':self.gender, 'Weight':self.weight}, + kdims=self.kdims[:1], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_ellipsis_list_cols(self): + sliced = self.table.iloc[..., [0, 2]] + table = Dataset({'Gender':self.gender, 'Weight':self.weight}, + kdims=self.kdims[:1], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + + def test_dataset_iloc_ellipsis_list_cols_by_name(self): + sliced = self.table.iloc[..., ['Gender', 'Weight']] + table = Dataset({'Gender':self.gender, 'Weight':self.weight}, + kdims=self.kdims[:1], vdims=self.vdims[:1]) + self.assertEqual(sliced, table) + # Casting def test_dataset_array_ht(self): @@ -562,6 +730,9 @@ class ArrayDatasetTest(HomogeneousColumnTypes, ComparisonTestCase): """ Test of the ArrayDataset interface. """ + + datatype = 'array' + def setUp(self): self.restore_datatype = Dataset.datatype Dataset.datatype = ['array'] @@ -574,6 +745,8 @@ class DFDatasetTest(HeterogeneousColumnTypes, ComparisonTestCase): Test of the pandas DFDataset interface. """ + datatype = 'dataframe' + def setUp(self): if pd is None: raise SkipTest("Pandas not available") @@ -588,6 +761,8 @@ class DaskDatasetTest(HeterogeneousColumnTypes, ComparisonTestCase): Test of the pandas DaskDataset interface. """ + datatype = 'dask' + def setUp(self): if dd is None: raise SkipTest("dask not available") @@ -630,6 +805,8 @@ class DictDatasetTest(HeterogeneousColumnTypes, ComparisonTestCase): Test of the generic dictionary interface. """ + datatype = 'dictionary' + def setUp(self): self.restore_datatype = Dataset.datatype Dataset.datatype = ['dictionary'] @@ -698,10 +875,9 @@ def setUp(self): self.init_data() def init_column_data(self): - self.xs = range(11) - self.xs_2 = [el**2 for el in self.xs] - - self.y_ints = [i*2 for i in range(11)] + self.xs = np.arange(11) + self.xs_2 = self.xs**2 + self.y_ints = self.xs*2 self.dataset_hm = Dataset((self.xs, self.y_ints), kdims=['x'], vdims=['y']) self.dataset_hm_alias = Dataset((self.xs, self.y_ints), From 48d7a06ce6eef17b85eae95777f42bbf83d3fb8e Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 12:21:31 +0100 Subject: [PATCH 03/20] Small docstring improvements --- holoviews/core/data/__init__.py | 4 +++- holoviews/core/data/interface.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 90dd463a6e..f82c2e9a64 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -628,7 +628,9 @@ def to(self): def iloc(self): """ Returns a TabularIndex, providing a convenient interface to - slice and index into the Dataset using row and column indices. + slice and index into the Dataset using row and column indices, + allow selection by integer index, slice and list of integer + indices and boolean arrays. """ return TabularIndex(self) diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index f25fbea38a..9a0d888a6c 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -9,7 +9,7 @@ class TabularIndex(object): """ TabularIndex is small wrapper object that allows row, column - based indexing into a Dataset using the ``iloc`` property. + based indexing into a Dataset using the ``.iloc`` property. It supports the usual numpy and pandas iloc indexing semantics including integer indices, slices, lists and arrays of values. """ From ddff78553cac14c897ca5edcdb62b58a3c5f13ae Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 12:25:20 +0100 Subject: [PATCH 04/20] Updated Point selection example to use .iloc --- examples/streams/bokeh/point_selection1D.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/streams/bokeh/point_selection1D.ipynb b/examples/streams/bokeh/point_selection1D.ipynb index 33f092775f..1e1b84e332 100644 --- a/examples/streams/bokeh/point_selection1D.ipynb +++ b/examples/streams/bokeh/point_selection1D.ipynb @@ -42,12 +42,12 @@ "\n", "# Write function that uses the selection indices to slice points and compute stats\n", "def selected_info(index):\n", - " arr = points.array()[index]\n", + " selected = points.iloc[index]\n", " if index:\n", - " label = 'Mean x, y: %.3f, %.3f' % tuple(arr.mean(axis=0))\n", + " label = 'Mean x, y: %.3f, %.3f' % tuple(selected.array().mean(axis=0))\n", " else:\n", " label = 'No selection'\n", - " return points.clone(arr, label=label)(style=dict(color='red'))\n", + " return selected.relabel(label)(style=dict(color='red'))\n", "\n", "# Combine points and DynamicMap\n", "points + hv.DynamicMap(selected_info, streams=[selection])" From 0fcf16156b234cac40227a4f9d454a3b9121d2e6 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 12:32:02 +0100 Subject: [PATCH 05/20] Renamed TabularIndex object to iloc --- holoviews/core/data/__init__.py | 6 +++--- holoviews/core/data/interface.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index f82c2e9a64..3f1a1bb718 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -9,7 +9,7 @@ import param from ..dimension import redim -from .interface import Interface, TabularIndex +from .interface import Interface, iloc from .array import ArrayInterface from .dictionary import DictInterface from .grid import GridInterface @@ -627,12 +627,12 @@ def to(self): @property def iloc(self): """ - Returns a TabularIndex, providing a convenient interface to + Returns an iloc object, providing a convenient interface to slice and index into the Dataset using row and column indices, allow selection by integer index, slice and list of integer indices and boolean arrays. """ - return TabularIndex(self) + return iloc(self) # Aliases for pickle backward compatibility diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index 9a0d888a6c..645d96d48d 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -6,12 +6,12 @@ from .. import util -class TabularIndex(object): +class iloc(object): """ - TabularIndex is small wrapper object that allows row, column - based indexing into a Dataset using the ``.iloc`` property. - It supports the usual numpy and pandas iloc indexing semantics - including integer indices, slices, lists and arrays of values. + iloc is small wrapper object that allows row, column based + indexing into a Dataset using the ``.iloc`` property. It supports + the usual numpy and pandas iloc indexing semantics including + integer indices, slices, lists and arrays of values. """ def __init__(self, dataset): From c93995c9767aee262090745c3a4e034c1d168e35 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 14:27:04 +0100 Subject: [PATCH 06/20] Added ndloc indexing interface --- holoviews/core/data/__init__.py | 7 ++++- holoviews/core/data/grid.py | 49 +++++++++++++++++++++++++++++++- holoviews/core/data/image.py | 7 +++++ holoviews/core/data/interface.py | 21 ++++++++++++++ 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 3f1a1bb718..f5be2c39bc 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -9,7 +9,7 @@ import param from ..dimension import redim -from .interface import Interface, iloc +from .interface import Interface, iloc, ndloc from .array import ArrayInterface from .dictionary import DictInterface from .grid import GridInterface @@ -635,6 +635,11 @@ def iloc(self): return iloc(self) + @property + def ndloc(self): + return ndloc(self) + + # Aliases for pickle backward compatibility Columns = Dataset ArrayColumns = ArrayInterface diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py index 1c42d511c0..a66949aaa8 100644 --- a/holoviews/core/data/grid.py +++ b/holoviews/core/data/grid.py @@ -1,4 +1,4 @@ -from collections import OrderedDict, defaultdict +from collections import OrderedDict, defaultdict, Iterable try: import itertools.izip as zip @@ -167,6 +167,53 @@ def canonicalize(cls, dataset, data, coord_dims=None): return data + @classmethod + def invert_index(cls, index, length): + if np.isscalar(index): + return length - index + elif isinstance(index, slice): + start, stop = index.start, index.stop + new_start, new_stop = None, None + if start is not None: + new_stop = length - start + if stop is not None: + new_start = length - stop + return slice(new_start-1, new_stop-1) + elif isinstance(index, Iterable): + new_index = [] + for ind in index: + new_index.append(length-ind) + return new_index + + + @classmethod + def ndloc(cls, dataset, indices): + selected = {} + adjusted_inds = [] + all_scalar = True + for kd, ind in zip(dataset.kdims[::-1], indices): + coords = cls.coords(dataset, kd.name) + if np.all(coords[1:] < coords[:-1]): + ind = cls.invert_index(ind, len(coords)) + if np.isscalar(ind): + ind = [ind] + else: + all_scalar = False + selected[kd.name] = coords[ind] + adjusted_inds.append(ind) + for kd in dataset.kdims: + if kd.name not in selected: + coords = cls.coords(dataset, kd.name) + selected[kd.name] = coords + all_scalar = False + for vd in dataset.vdims: + arr = dataset.dimension_values(vd, flat=False) + if all_scalar and len(dataset.vdims) == 1: + return arr[tuple(ind[0] for ind in adjusted_inds)] + selected[vd.name] = arr[tuple(adjusted_inds)] + return tuple(selected[d.name] for d in dataset.dimensions()) + + @classmethod def values(cls, dataset, dim, expanded=True, flat=True): dim = dataset.get_dimension(dim, strict=True) diff --git a/holoviews/core/data/image.py b/holoviews/core/data/image.py index 81a8bbb837..70ac560ffd 100644 --- a/holoviews/core/data/image.py +++ b/holoviews/core/data/image.py @@ -89,6 +89,13 @@ def reindex(cls, dataset, kdims=None, vdims=None): return data[..., inds] if len(inds) > 1 else data[..., inds[0]] return data + @classmethod + def coords(cls, dataset, dim, ordered=False, expanded=False): + dim = dataset.get_dimension(dim, strict=True) + if expanded: + return util.expand_grid_coords(dataset, dim) + return cls.values(dataset, dim, expanded=False) + @classmethod def range(cls, obj, dim): dim_idx = obj.get_dimension_index(dim) diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index 645d96d48d..6c0a49ae14 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -53,6 +53,27 @@ def __getitem__(self, index): datatype=datatype) +class ndloc(object): + + def __init__(self, dataset): + self.dataset = dataset + + def __getitem__(self, indices): + ds = self.dataset + indices = util.wrap_tuple(indices) + if not ds.interface.gridded: + raise IndexError('Cannot use ndloc on non nd-dimensional datastructure') + selected = self.dataset.interface.ndloc(ds, indices) + if np.isscalar(selected): + return selected + datatype = [dt for dt in ds.datatype if dt in Interface.interfaces and + Interface.interfaces[dt].gridded] + params = {} + if hasattr(ds, 'bounds'): + params['bounds'] = None + return self.dataset.clone(selected, datatype=[ds.interface.datatype]+datatype, **params) + + class Interface(param.Parameterized): interfaces = {} From 0d2a9c14c4632905b6bd05027dcfb2d0badf49c0 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 16:48:09 +0100 Subject: [PATCH 07/20] Implemented Image indexing using ndloc --- holoviews/core/util.py | 2 +- holoviews/element/raster.py | 28 ++++++++++++++++++---------- tests/testdataset.py | 6 +++--- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/holoviews/core/util.py b/holoviews/core/util.py index e8770dd233..b93f89e1da 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -1449,7 +1449,7 @@ def bound_range(vals, density): """ low, high = vals.min(), vals.max() invert = False - if vals[0] > vals[1]: + if len(vals) > 1 and vals[0] > vals[1]: invert = True if not density: density = round(1./((high-low)/(len(vals)-1)), sys.float_info.dig) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index d629a3fc57..d4c3e58fab 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -7,7 +7,7 @@ from ..core.data import ImageInterface from ..core import Dimension, Element2D, Overlay, Dataset from ..core.boundingregion import BoundingRegion, BoundingBox -from ..core.sheetcoords import SheetCoordinateSystem +from ..core.sheetcoords import SheetCoordinateSystem, Slice from ..core.util import max_range from .chart import Curve from .tabular import Table @@ -298,21 +298,29 @@ def select(self, selection_specs=None, **selection): coords = tuple(selection[kd.name] if kd.name in selection else slice(None) for kd in self.kdims) + shape = self.interface.shape(self, gridded=True) if any([isinstance(el, slice) for el in coords]): - shape = self.interface.shape(self, gridded=True) bounds = compute_slice_bounds(coords, self, shape[:2]) xdim, ydim = self.kdims l, b, r, t = bounds.lbrt() - selection = {xdim.name: slice(l, r), ydim.name: slice(b, t)} - else: - selection = {kd.name: c for kd, c in zip(self.kdims, self.closest(coords))} - data = self.interface.select(self, **selection) - if isinstance(data, np.ndarray) and data.ndim == 1: - return self.clone([tuple(data)], kdims=[], new_type=Dataset) - elif np.isscalar(data): - return data + # Situate resampled region into overall slice + y0, y1, x0, x1 = Slice(bounds, self) + y0, y1 = shape[0]-y1, shape[0]-y0 + selection = (slice(y0, y1), slice(x0, x1)) + sliced = True + else: + y, x = self.sheet2matrixidx(coords[0], coords[1]) + y = shape[0]-y-1 + selection = (y, x) + sliced = False + + data = self.interface.ndloc(self, selection) + if not sliced: + if np.isscalar(data): + return data + return self.clone(data[self.ndims:], kdims=[], new_type=Dataset) else: return self.clone(data, xdensity=self.xdensity, ydensity=self.ydensity, bounds=bounds) diff --git a/tests/testdataset.py b/tests/testdataset.py index 99835b14eb..4d7d10345b 100644 --- a/tests/testdataset.py +++ b/tests/testdataset.py @@ -884,9 +884,9 @@ def init_column_data(self): kdims=[('x', 'X')], vdims=[('y', 'Y')]) def init_grid_data(self): - self.grid_xs = [0, 1] - self.grid_ys = [0.1, 0.2, 0.3] - self.grid_zs = [[0, 1], [2, 3], [4, 5]] + self.grid_xs = np.array([0, 1]) + self.grid_ys = np.array([0.1, 0.2, 0.3]) + self.grid_zs = np.array([[0, 1], [2, 3], [4, 5]]) self.dataset_grid = self.eltype((self.grid_xs, self.grid_ys, self.grid_zs), kdims=['x', 'y'], vdims=['z']) From 969c06cc017614844d015729729e0c9c0876512d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 17:28:18 +0100 Subject: [PATCH 08/20] Implemented Image.sample on top of ndloc interface --- holoviews/core/data/__init__.py | 4 ++- holoviews/element/raster.py | 46 +++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index f5be2c39bc..8966efb682 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -433,7 +433,9 @@ def sample(self, samples=[], closest=True, **kwargs): else: selection = tuple(selection.columns(kdims+self.vdims).values()) - return self.clone(selection, kdims=kdims, new_type=new_type) + datatype = list(util.unique_iterator(self.datatype+['dataframe', 'dict'])) + return self.clone(selection, kdims=kdims, new_type=new_type, + datatype=datatype) lens = set(len(util.wrap_tuple(s)) for s in samples) if len(lens) > 1: diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index d4c3e58fab..93a4d6fd03 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -326,6 +326,52 @@ def select(self, selection_specs=None, **selection): ydensity=self.ydensity, bounds=bounds) + def sample(self, samples=[], **kwargs): + """ + Allows sampling of an Image as an iterator of coordinates + matching the key dimensions, returning a new object containing + just the selected samples. Alternatively may supply kwargs to + sample a coordinate on an object. On an Image the coordinates + are continuously indexed and will always snap to the nearest + coordinate. + """ + if kwargs and samples: + raise Exception('Supply explicit list of samples or kwargs, not both.') + elif kwargs: + sample = [slice(None) for _ in range(self.ndims)] + for dim, val in kwargs.items(): + sample[self.get_dimension_index(dim)] = val + samples = [tuple(sample)] + + # If a 1D cross-section of 2D space return Curve + if len(samples) == 1: + dims = [kd for kd, v in zip(self.kdims, samples[0]) if not np.isscalar(v)] + if len(dims) == 1: + kdims = [self.get_dimension(kd) for kd in dims] + sel = {kd.name: s for kd, s in zip(self.kdims, samples[0])} + dims = [kd for kd, v in sel.items() if not np.isscalar(v)] + selection = self.select(**sel) + selection = tuple(selection.columns(kdims+self.vdims).values()) + datatype = list(util.unique_iterator(self.datatype+['dataframe', 'dict'])) + return self.clone(selection, kdims=kdims, new_type=Curve, + datatype=datatype) + else: + new_type = Table + kdims = self.kdims + else: + new_type = Dataset + kdims = self.kdims + + xs, ys = [], [] + for s in samples: + if len(s) > 1: + y, x = self.sheet2matrixidx(*s) + xs.append(x) + ys.append(y) + data = self.interface.ndloc(self, (ys, xs)) + return self.clone(data, new_type=Dataset, datatype=['dataframe', 'dict']) + + def closest(self, coords=[], **kwargs): """ Given a single coordinate or multiple coordinates as From 2b727850415ff4254698726732e6086cd116e3e8 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 18:55:55 +0100 Subject: [PATCH 09/20] Fixed bug in Dataset unit test setup --- tests/testdataset.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/testdataset.py b/tests/testdataset.py index 4d7d10345b..4822b84709 100644 --- a/tests/testdataset.py +++ b/tests/testdataset.py @@ -872,7 +872,8 @@ def setUp(self): self.restore_datatype = Dataset.datatype Dataset.datatype = ['grid'] self.data_instance_type = dict - self.init_data() + self.init_column_data() + self.init_grid_data() def init_column_data(self): self.xs = np.arange(11) @@ -1308,10 +1309,10 @@ class XArrayDaskArrayDatasetTest(XArrayDatasetTest): def init_column_data(self): import dask.array - self.xs = range(11) - self.xs_2 = [el**2 for el in self.xs] + self.xs = np.array(range(11)) + self.xs_2 = self.xs**2 - self.y_ints = [i*2 for i in range(11)] + self.y_ints = self.xs*2 dask_y = dask.array.from_array(np.array(self.y_ints), 2) self.dataset_hm = Dataset((self.xs, dask_y), kdims=['x'], vdims=['y']) From 9638fb892abfc615ed2d1d5edc40973330f305cb Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 19:01:54 +0100 Subject: [PATCH 10/20] Fixed closest bug in Image.sample --- holoviews/element/raster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index 93a4d6fd03..adbfd7cf32 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -335,6 +335,7 @@ def sample(self, samples=[], **kwargs): are continuously indexed and will always snap to the nearest coordinate. """ + kwargs = {k: v for k, v in kwargs.items() if k != 'closest'} if kwargs and samples: raise Exception('Supply explicit list of samples or kwargs, not both.') elif kwargs: From 0da749b76618117fa9d03d23a00adb9a628ef75f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 20:05:16 +0100 Subject: [PATCH 11/20] Fixed Image.sample y-coord index --- holoviews/element/raster.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index adbfd7cf32..fa7e241179 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -345,6 +345,7 @@ def sample(self, samples=[], **kwargs): samples = [tuple(sample)] # If a 1D cross-section of 2D space return Curve + shape = self.interface.shape(self, gridded=True) if len(samples) == 1: dims = [kd for kd, v in zip(self.kdims, samples[0]) if not np.isscalar(v)] if len(dims) == 1: @@ -368,9 +369,9 @@ def sample(self, samples=[], **kwargs): if len(s) > 1: y, x = self.sheet2matrixidx(*s) xs.append(x) - ys.append(y) + ys.append(shape[0]-y) data = self.interface.ndloc(self, (ys, xs)) - return self.clone(data, new_type=Dataset, datatype=['dataframe', 'dict']) + return self.clone(data, new_type=Table, datatype=['dataframe', 'dict']) def closest(self, coords=[], **kwargs): From 4af6c8c098db0b0e3db81249c3f4b95fae26002a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 21:27:13 +0100 Subject: [PATCH 12/20] Minor fixes for sampling --- doc/Tutorials/Introduction.ipynb | 2 +- holoviews/element/raster.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/Tutorials/Introduction.ipynb b/doc/Tutorials/Introduction.ipynb index 237b10a1a9..38e14b72cc 100644 --- a/doc/Tutorials/Introduction.ipynb +++ b/doc/Tutorials/Introduction.ipynb @@ -550,7 +550,7 @@ "source": [ "print(rgb_parrot)\n", "print(rgb_parrot[0,0])\n", - "print(rgb_parrot[0,0][0])" + "print(rgb_parrot[0,0].iloc[0, 0])" ] }, { diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index fa7e241179..af805d9fcc 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -369,7 +369,7 @@ def sample(self, samples=[], **kwargs): if len(s) > 1: y, x = self.sheet2matrixidx(*s) xs.append(x) - ys.append(shape[0]-y) + ys.append(shape[0]-y-1) data = self.interface.ndloc(self, (ys, xs)) return self.clone(data, new_type=Table, datatype=['dataframe', 'dict']) From 3f5ba0509b3a7d6405d3d5ce5831b32df59be7d0 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 22:17:05 +0100 Subject: [PATCH 13/20] Added Image sampling test --- tests/testimageinterfaces.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/testimageinterfaces.py b/tests/testimageinterfaces.py index 698ef195b7..9e6924b5b5 100644 --- a/tests/testimageinterfaces.py +++ b/tests/testimageinterfaces.py @@ -2,7 +2,7 @@ from nose.plugins.attrib import attr import numpy as np -from holoviews import Dimension, Image, Curve, RGB, HSV, Dataset +from holoviews import Dimension, Image, Curve, RGB, HSV, Dataset, Table from holoviews.element.comparison import ComparisonTestCase from .testdataset import DatatypeContext @@ -118,6 +118,18 @@ def test_sample_ycoord(self): self.assertEqual(self.image.sample(y=5), Curve((xs, zs), kdims=['x'], vdims=['z'])) + def test_sample_coords(self): + arr = np.arange(10)*np.arange(5)[np.newaxis].T + xs = np.linspace(0.12, 0.81, 10) + ys = np.linspace(0.12, 0.391, 5) + img = Image((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype]) + sampled = img.sample([(0.15, 0.15), (0.15, 0.4), (0.8, 0.4), (0.8, 0.15)]) + self.assertIsInstance(sampled, Table) + yidx = [0, 4, 4, 0] + xidx = [0, 0, 9, 9] + table = Table((xs[xidx], ys[yidx], arr[yidx, xidx]), kdims=['x', 'y'], vdims=['z']) + self.assertEqual(sampled, table) + def test_reduce_to_scalar(self): self.assertEqual(self.image.reduce(['x', 'y'], function=np.mean), 20.25) From 788cea806401e5cb20b3685b38caa479072c4d98 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 23:00:47 +0100 Subject: [PATCH 14/20] Small fix for ndloc --- holoviews/core/data/interface.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index 6c0a49ae14..c34c828cac 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -66,12 +66,10 @@ def __getitem__(self, indices): selected = self.dataset.interface.ndloc(ds, indices) if np.isscalar(selected): return selected - datatype = [dt for dt in ds.datatype if dt in Interface.interfaces and - Interface.interfaces[dt].gridded] params = {} if hasattr(ds, 'bounds'): params['bounds'] = None - return self.dataset.clone(selected, datatype=[ds.interface.datatype]+datatype, **params) + return self.dataset.clone(selected, datatype=[ds.interface.datatype]+ds.datatype, **params) class Interface(param.Parameterized): From bdf2ad310505593507d0c941a548d79a900b28ca Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 23:01:24 +0100 Subject: [PATCH 15/20] Vectorized Image.sample --- holoviews/element/raster.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index af805d9fcc..cb07d6ebbd 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -364,13 +364,10 @@ def sample(self, samples=[], **kwargs): new_type = Dataset kdims = self.kdims - xs, ys = [], [] - for s in samples: - if len(s) > 1: - y, x = self.sheet2matrixidx(*s) - xs.append(x) - ys.append(shape[0]-y-1) - data = self.interface.ndloc(self, (ys, xs)) + xs, ys = zip(*samples) + yidx, xidx = self.sheet2matrixidx(np.array(xs), np.array(ys)) + yidx = shape[0]-yidx-1 + data = self.interface.ndloc(self, (yidx, xidx)) return self.clone(data, new_type=Table, datatype=['dataframe', 'dict']) From 6571609d4fc2f19a046fff5cfa22b78c8fc5c79d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 23:01:52 +0100 Subject: [PATCH 16/20] Added ndloc unit tests --- tests/testdataset.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/testdataset.py b/tests/testdataset.py index 4822b84709..56af76ef4a 100644 --- a/tests/testdataset.py +++ b/tests/testdataset.py @@ -911,6 +911,44 @@ def test_canonical_vdim(self): self.assertEqual(dataset.dimension_values('z', flat=False), canonical) + def test_dataset_ndloc_index(self): + xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5) + arr = np.arange(10)*np.arange(5)[np.newaxis].T + ds = Dataset((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype]) + self.assertEqual(ds.ndloc[0,0], arr[0, 0]) + + def test_dataset_ndloc_index2(self): + xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5) + arr = np.arange(10)*np.arange(5)[np.newaxis].T + ds = Dataset((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype]) + self.assertEqual(ds.ndloc[4, 9], arr[4, 9]) + + def test_dataset_ndloc_slice(self): + xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5) + arr = np.arange(10)*np.arange(5)[np.newaxis].T + ds = Dataset((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype]) + sliced = Dataset((xs[2:5], ys[1:], arr[1:, 2:5]), kdims=['x', 'y'], vdims=['z'], + datatype=[self.datatype]) + self.assertEqual(ds.ndloc[1:, 2:5], sliced) + + def test_dataset_ndloc_lists(self): + xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5) + arr = np.arange(10)*np.arange(5)[np.newaxis].T + ds = Dataset((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype, 'dictionary']) + sliced = Dataset((xs[[1, 2, 3]], ys[[0, 1, 2]], arr[[0, 1, 2], [1, 2, 3]]), kdims=['x', 'y'], vdims=['z'], + datatype=['dictionary']) + self.assertEqual(ds.ndloc[[0, 1, 2], [1, 2, 3]], sliced) + + def test_dataset_ndloc_slice_two_vdims(self): + xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5) + arr = np.arange(10)*np.arange(5)[np.newaxis].T + arr2 = (np.arange(10)*np.arange(5)[np.newaxis].T)[::-1] + ds = Dataset((xs, ys, arr, arr2), kdims=['x', 'y'], vdims=['z', 'z2'], datatype=[self.datatype, 'dictionary']) + sliced = Dataset((xs[[1, 2, 3]], ys[[0, 1, 2]], arr[[0, 1, 2], [1, 2, 3]], + arr2[[0, 1, 2], [1, 2, 3]]), kdims=['x', 'y'], vdims=['z', 'z2'], + datatype=['dictionary']) + self.assertEqual(ds.ndloc[[0, 1, 2], [1, 2, 3]], sliced) + def test_dataset_dim_vals_grid_kdims_xs(self): self.assertEqual(self.dataset_grid.dimension_values(0, expanded=False), np.array([0, 1])) @@ -1248,6 +1286,9 @@ def test_dataset_groupby_drop_dims_with_vdim(self): def test_dataset_groupby_drop_dims_dynamic_with_vdim(self): raise SkipTest("Not supported") + def test_dataset_ndloc_slice_two_vdims(self): + raise SkipTest("Not supported") + @attr(optional=1) class XArrayDatasetTest(GridDatasetTest): From 8607c20bd83f7f1da0afa579c775f2a94139a792 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 23:27:51 +0100 Subject: [PATCH 17/20] Simplified decimate operation using iloc --- holoviews/operation/element.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index 7530791dfe..47bc2d00ae 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -597,17 +597,7 @@ def _process_layer(self, element, key=None): if len(sliced) > self.p.max_samples: prng = np.random.RandomState(self.p.random_seed) - length = len(sliced) - if element.interface is PandasInterface: - data = sliced.data.sample(self.p.max_samples, - random_state=prng) - else: - inds = prng.choice(length, self.p.max_samples, False) - if isinstance(element.interface, DictInterface): - data = {k: v[inds] for k, v in sliced.data.items()} - else: - data = sliced.data[inds, :] - sliced = element.clone(data) + return element.iloc[prng.choice(len(sliced), self.p.max_samples, False)] return sliced def _process(self, element, key=None): From 922accd701e5b122f6a97c5ddd96f646f022d4cb Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 18 Jun 2017 23:37:00 +0100 Subject: [PATCH 18/20] Use iloc in Tabular.pprint_cell --- holoviews/core/element.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 55bfd64f52..1e424c5375 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -228,8 +228,7 @@ def pprint_cell(self, row, col): return self.kdims[col].pprint_label else: dim = self.get_dimension(col) - values = self[dim.name] - return dim.pprint_value(values[row-1]) + return dim.pprint_value(self.iloc[row-1, col]) def cell_type(self, row, col): From 5a1d93062dd87c21ef57eaefd77968a056a6516c Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 19 Jun 2017 00:34:08 +0100 Subject: [PATCH 19/20] Improved iloc and ndloc docstrings --- holoviews/core/data/__init__.py | 46 +++++++++++++++++++++++++++++--- holoviews/core/data/interface.py | 10 ++++++- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 8966efb682..d8d2836483 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -629,16 +629,54 @@ def to(self): @property def iloc(self): """ - Returns an iloc object, providing a convenient interface to - slice and index into the Dataset using row and column indices, - allow selection by integer index, slice and list of integer - indices and boolean arrays. + Returns an iloc object providing a convenient interface to + slice and index into the Dataset using row and column indices. + Allow selection by integer index, slice and list of integer + indices and boolean arrays, e.g.: + + Examples: + + * Index the first row and column: + + dataset.iloc[0, 0] + + * Select rows 1 and 2 with a slice: + + dataset.iloc[1:3, :] + + * Select with a list of integer coordinates: + + dataset.iloc[[0, 2, 3]] """ return iloc(self) @property def ndloc(self): + """ + Returns an ndloc object providing nd-array like indexing for + gridded datasets. Follows NumPy array indexing conventions, + allowing for indexing, slicing and selecting a list of indices + on multi-dimensional arrays using integer indices. The order + of array indices is inverted Dataset key dimensions, e.g. an + Image with key dimensions 'x' and 'y' can be indexed with + ``image.ndloc[iy, ix]``, where ``iy`` and ``ix`` are integer + indices along the y and x dimensions. + + Examples: + + * Index value in 2D array: + + dataset.ndloc[3, 1] + + * Slice along y-axis of 2D array: + + dataset.ndloc[2:5, :] + + * Select with integer coordinates along x- and y-axes: + + dataset.ndloc[[1, 2, 3], [0, 2, 3]] + """ return ndloc(self) diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index c34c828cac..8694e95260 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -11,7 +11,8 @@ class iloc(object): iloc is small wrapper object that allows row, column based indexing into a Dataset using the ``.iloc`` property. It supports the usual numpy and pandas iloc indexing semantics including - integer indices, slices, lists and arrays of values. + integer indices, slices, lists and arrays of values. For more + information see the ``Dataset.iloc`` property docstring. """ def __init__(self, dataset): @@ -54,6 +55,13 @@ def __getitem__(self, index): class ndloc(object): + """ + ndloc is a small wrapper object that allows ndarray-like indexing + for gridded Datasets using the ``.ndloc`` property. It supports + the standard NumPy ndarray indexing semantics including + integer indices, slices, lists and arrays of values. For more + information see the ``Dataset.ndloc`` property docstring. + """ def __init__(self, dataset): self.dataset = dataset From 3ecff25bc06acc8c1487e364204f22dd90257607 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 19 Jun 2017 02:13:36 +0100 Subject: [PATCH 20/20] Small docstring fixes for iloc and ndloc --- holoviews/core/data/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index d8d2836483..4944bb4780 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -632,7 +632,7 @@ def iloc(self): Returns an iloc object providing a convenient interface to slice and index into the Dataset using row and column indices. Allow selection by integer index, slice and list of integer - indices and boolean arrays, e.g.: + indices and boolean arrays. Examples: @@ -658,10 +658,10 @@ def ndloc(self): gridded datasets. Follows NumPy array indexing conventions, allowing for indexing, slicing and selecting a list of indices on multi-dimensional arrays using integer indices. The order - of array indices is inverted Dataset key dimensions, e.g. an - Image with key dimensions 'x' and 'y' can be indexed with - ``image.ndloc[iy, ix]``, where ``iy`` and ``ix`` are integer - indices along the y and x dimensions. + of array indices is inverted relative to the Dataset key + dimensions, e.g. an Image with key dimensions 'x' and 'y' can + be indexed with ``image.ndloc[iy, ix]``, where ``iy`` and + ``ix`` are integer indices along the y and x dimensions. Examples: @@ -673,7 +673,7 @@ def ndloc(self): dataset.ndloc[2:5, :] - * Select with integer coordinates along x- and y-axes: + * Vectorized (non-orthogonal) indexing along x- and y-axes: dataset.ndloc[[1, 2, 3], [0, 2, 3]] """