From b32522a7589e11c83f5d7117b03ac9a14ff949ca Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Tue, 17 Sep 2019 18:20:38 -0400 Subject: [PATCH 01/23] Add pipeline dataset property to track data lineage --- holoviews/core/data/__init__.py | 136 ++++- holoviews/core/data/interface.py | 60 +- holoviews/core/data/multipath.py | 7 +- holoviews/core/dimension.py | 37 -- holoviews/core/operation.py | 8 +- holoviews/element/chart.py | 54 -- holoviews/operation/element.py | 17 +- holoviews/operation/timeseries.py | 6 +- holoviews/tests/core/testdatasetproperty.py | 605 +++++++++++++++++++- holoviews/tests/operation/testoperation.py | 40 -- 10 files changed, 752 insertions(+), 218 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index d0a5fc6ab8..0b4dda011b 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -5,16 +5,19 @@ except ImportError: pass +import types +import copy import numpy as np import param +from param.parameterized import add_metaclass, ParameterizedMetaclass from .. import util from ..accessors import Redim from ..dimension import Dimension, process_dimensions from ..element import Element -from ..ndmapping import OrderedDict +from ..ndmapping import OrderedDict, MultiDimensionalMapping from ..spaces import HoloMap, DynamicMap -from .interface import Interface, iloc, ndloc, DataError +from .interface import Interface, iloc, ndloc from .array import ArrayInterface from .dictionary import DictInterface from .grid import GridInterface @@ -155,6 +158,7 @@ def __call__(self, new_type, kdims=None, vdims=None, groupby=None, if len(kdims) == selected.ndims or not groupby: # Propagate dataset params['dataset'] = self._element.dataset + params['pipeline'] = self._element._pipeline element = new_type(selected, **params) return element.sort() if sort else element group = selected.groupby(groupby, container_type=HoloMap, @@ -165,7 +169,52 @@ def __call__(self, new_type, kdims=None, vdims=None, groupby=None, return group +class PipelineMeta(ParameterizedMetaclass): + # Public methods that should not be wrapped + blacklist = ['__init__', 'clone', 'execute_pipeline'] + + def __new__(cls, classname, bases, classdict): + + for method_name in classdict: + method_fn = classdict[method_name] + if method_name in cls.blacklist or method_name.startswith('_'): + continue + elif isinstance(method_fn, types.FunctionType): + classdict[method_name] = cls.pipelined(method_fn) + + inst = type.__new__(cls, classname, bases, classdict) + inst._in_method = False + return inst + + @staticmethod + def pipelined(method): + def pipelined_fn(*a, **k): + inst = a[0] + in_method = inst._in_method + if not in_method: + inst._in_method = True + + result = method(*a, **k) + + if not in_method: + if isinstance(result, Dataset): + result._pipeline = inst._pipeline + [ + (method, list(a[1:]), k) + ] + elif isinstance(result, MultiDimensionalMapping): + for key, element in result.items(): + element._pipeline = inst._pipeline + [ + (method, list(a[1:]), k), + (getattr(type(result), '__getitem__'), [key], {}) + ] + inst._in_method = False + return result + + return pipelined_fn + + +@add_metaclass(PipelineMeta) class Dataset(Element): """ Dataset provides a general baseclass for Element types that @@ -201,6 +250,8 @@ class Dataset(Element): _kdim_reductions = {} def __init__(self, data, kdims=None, vdims=None, **kwargs): + input_dataset = kwargs.pop('dataset', None) + input_pipeline = kwargs.pop('pipeline', []) if isinstance(data, Element): pvals = util.get_param_values(data) kwargs.update([(l, pvals[l]) for l in ['group', 'label'] @@ -217,6 +268,65 @@ def __init__(self, data, kdims=None, vdims=None, **kwargs): self.redim = Redim(self, mode='dataset') + # Handle _pipeline property + self._pipeline = input_pipeline + [( + type(self), + [], + kwargs, # includes kdims and vdims + )] + + # Handle initializing the dataset property. + self._dataset = None + if input_dataset is not None: + self._dataset = input_dataset.clone(dataset=None, pipeline=[]) + + elif type(self) is Dataset: + self._dataset = self + + @property + def dataset(self): + """ + The Dataset that this object was created from + """ + from . import Dataset + if self._dataset is None: + dataset = Dataset(self, _validate_vdims=False) + if hasattr(self, '_binned'): + dataset._binned = self._binned + return dataset + else: + return self._dataset + + @property + def pipeline(self): + """ + List of (function, args, kwargs) tuples that represents the sequence + of operations that was used to create this object, starting + with the Dataset stored in dataset property + """ + return self._pipeline + + def execute_pipeline(self, data=None): + """ + Create a new object of the same type by executing the sequence of + operations that was used to create this object. + + Args: + data: Input data to the pipeline. If None, defaults to the value + of the dataset property and the resulting object will equal the + this object. + + Returns: + An object with the same type as this object + """ + new_dataset = self.dataset.clone(data=data, dataset=None, pipeline=[]) + result = new_dataset + for fn, a, kw in self._pipeline: + result = fn(result, *a, **kw) + + result._pipeline = copy.copy(self._pipeline) + result._dataset = new_dataset + return result def closest(self, coords=[], **kwargs): """Snaps coordinate(s) to closest coordinate in Dataset @@ -880,20 +990,18 @@ def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides): datatypes = [self.interface.datatype] + self.datatype overrides['datatype'] = list(util.unique_iterator(datatypes)) - if 'dataset' in overrides: - dataset = overrides.pop('dataset') - else: - dataset = self.dataset + if 'dataset' not in overrides: + overrides['dataset'] = self.dataset - new_dataset = super(Dataset, self).clone(data, shared_data, new_type, *args, **overrides) + if 'pipeline' not in overrides: + overrides['pipeline'] = self._pipeline - if dataset is not None: - try: - new_dataset._dataset = dataset.clone(data=new_dataset.data, dataset=None) - except DataError: - # New dataset doesn't have the necessary dimensions to - # propagate dataset. Do nothing - pass + if data is None: + overrides['_validate_vdims'] = False + + new_dataset = super(Dataset, self).clone( + data, shared_data, new_type, *args, **overrides + ) return new_dataset diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index c97b2858c0..a7ef82222c 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -40,7 +40,25 @@ def __init__(self, msg, interface=None): super(DataError, self).__init__(msg) -class iloc(object): +class Accessor(object): + def __init__(self, dataset): + self.dataset = dataset + + def __getitem__(self, index): + from ..data import Dataset + res = self._perform_getitem(self.dataset, index) + if isinstance(res, Dataset): + res._pipeline = self.dataset.pipeline + [ + (getattr(type(self), '_perform_getitem'), [index], {}) + ] + return res + + @classmethod + def _perform_getitem(cls, dataset, index): + raise NotImplementedError() + + +class iloc(Accessor): """ iloc is small wrapper object that allows row, column based indexing into a Dataset using the ``.iloc`` property. It supports @@ -48,11 +66,8 @@ class iloc(object): integer indices, slices, lists and arrays of values. For more information see the ``Dataset.iloc`` property docstring. """ - - def __init__(self, dataset): - self.dataset = dataset - - def __getitem__(self, index): + @classmethod + def _perform_getitem(cls, dataset, index): index = util.wrap_tuple(index) if len(index) == 1: index = (index[0], slice(None)) @@ -63,32 +78,32 @@ def __getitem__(self, index): rows, cols = index if rows is Ellipsis: rows = slice(None) - data = self.dataset.interface.iloc(self.dataset.dataset, (rows, cols)) - kdims = self.dataset.kdims - vdims = self.dataset.vdims + data = dataset.interface.iloc(dataset.dataset, (rows, cols)) + kdims = dataset.kdims + vdims = dataset.vdims if np.isscalar(data): return data elif cols == slice(None): pass else: if isinstance(cols, slice): - dims = self.dataset.dimensions()[index[1]] + dims = dataset.dimensions()[index[1]] elif np.isscalar(cols): - dims = [self.dataset.get_dimension(cols)] + dims = [dataset.get_dimension(cols)] else: - dims = [self.dataset.get_dimension(d) for d in cols] + dims = [dataset.get_dimension(d) for d in cols] kdims = [d for d in dims if d in kdims] vdims = [d for d in dims if d in vdims] - datatype = [dt for dt in self.dataset.datatype + datatype = [dt for dt in dataset.datatype if dt in Interface.interfaces and not Interface.interfaces[dt].gridded] if not datatype: datatype = ['dataframe', 'dictionary'] - return self.dataset.clone(data, kdims=kdims, vdims=vdims, - datatype=datatype) + return dataset.clone(data, kdims=kdims, vdims=vdims, + datatype=datatype) -class ndloc(object): +class ndloc(Accessor): """ ndloc is a small wrapper object that allows ndarray-like indexing for gridded Datasets using the ``.ndloc`` property. It supports @@ -96,22 +111,19 @@ class ndloc(object): integer indices, slices, lists and arrays of values. For more information see the ``Dataset.ndloc`` property docstring. """ - - def __init__(self, dataset): - self.dataset = dataset - - def __getitem__(self, indices): - ds = self.dataset + @classmethod + def _perform_getitem(cls, dataset, indices): + ds = dataset indices = util.wrap_tuple(indices) if not ds.interface.gridded: raise IndexError('Cannot use ndloc on non nd-dimensional datastructure') - selected = self.dataset.interface.ndloc(ds, indices) + selected = dataset.interface.ndloc(ds, indices) if np.isscalar(selected): return selected params = {} if hasattr(ds, 'bounds'): params['bounds'] = None - return self.dataset.clone(selected, datatype=[ds.interface.datatype]+ds.datatype, **params) + return dataset.clone(selected, datatype=[ds.interface.datatype]+ds.datatype, **params) class Interface(param.Parameterized): diff --git a/holoviews/core/data/multipath.py b/holoviews/core/data/multipath.py index b0ff823a29..fa4053683c 100644 --- a/holoviews/core/data/multipath.py +++ b/holoviews/core/data/multipath.py @@ -59,7 +59,7 @@ def validate(cls, dataset, vdims=True): return from holoviews.element import Polygons - ds = cls._inner_dataset_template(dataset) + ds = cls._inner_dataset_template(dataset, validate_vdims=vdims) for d in dataset.data: ds.data = d ds.interface.validate(ds, vdims) @@ -76,7 +76,7 @@ def validate(cls, dataset, vdims=True): @classmethod - def _inner_dataset_template(cls, dataset): + def _inner_dataset_template(cls, dataset, validate_vdims=True): """ Returns a Dataset template used as a wrapper around the data contained within the multi-interface dataset. @@ -84,7 +84,8 @@ def _inner_dataset_template(cls, dataset): from . import Dataset vdims = dataset.vdims if getattr(dataset, 'level', None) is None else [] return Dataset(dataset.data[0], datatype=cls.subtypes, - kdims=dataset.kdims, vdims=vdims) + kdims=dataset.kdims, vdims=vdims, + _validate_vdims=validate_vdims) @classmethod def dimension_type(cls, dataset, dim): diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index 70415f151f..887d3575e7 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -486,41 +486,8 @@ def __init__(self, data, id=None, plot_id=None, **params): This class also has an id instance attribute, which may be set to associate some custom options with the object. """ - from . import Dataset, DataError self.data = data - # Handle initializing the dataset property. - self._dataset = None - input_dataset = params.pop('dataset', None) - if type(self) is Dataset: - self._dataset = self - elif input_dataset is not None: - # Clone dimension info from input dataset with reference to new - # data. This way we keep the metadata for all of the dimensions. - try: - self._dataset = input_dataset.clone(data=self.data) - except DataError: - # Dataset not compatible with input data - pass - if self._dataset is None: - # Create a default Dataset to wrap input data - try: - kdims = list(params.get('kdims', [])) - vdims = list(params.get('vdims', [])) - dims = kdims + vdims - dataset = Dataset( - self.data, - kdims=dims if dims else None - ) - if len(dataset.dimensions()) == 0: - # No dimensions could be auto-detected in data - raise DataError("No dimensions detected") - self._dataset = dataset - except DataError: - # Data not supported by any storage backend. leave _dataset as - # None - pass - self._id = None self.id = id self._plot_id = plot_id or util.builtins.id(self) @@ -542,10 +509,6 @@ def __init__(self, data, id=None, plot_id=None, **params): raise ValueError("Supplied label %r contains invalid characters." % self.label) - @property - def dataset(self): - return self._dataset - @property def id(self): return self._id diff --git a/holoviews/core/operation.py b/holoviews/core/operation.py index 85902fff84..ad0026c9b1 100644 --- a/holoviews/core/operation.py +++ b/holoviews/core/operation.py @@ -8,7 +8,7 @@ from .layout import Layout from .overlay import NdOverlay, Overlay from .spaces import Callable, HoloMap -from . import util +from . import util, Dataset class Operation(param.ParameterizedFunction): @@ -121,6 +121,12 @@ def _apply(self, element, key=None): ret = self._process(element, key) for hook in self._postprocess_hooks: ret = hook(self, ret, **kwargs) + + if isinstance(ret, Dataset) and isinstance(element, Dataset): + ret._dataset = element.dataset.clone() + ret._pipeline = element.pipeline + [ + (self.instance(), [], dict(self.p)) + ] return ret diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 2b36f33aa4..662653c8c5 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -1,5 +1,4 @@ import numpy as np -import copy import param from ..streams import BoundsXY @@ -225,61 +224,8 @@ def __init__(self, data, edges=None, **params): elif isinstance(data, tuple) and len(data) == 2 and len(data[0])+1 == len(data[1]): data = data[::-1] - self._operation_kwargs = params.pop('_operation_kwargs', None) - - dataset = params.pop("dataset", None) super(Histogram, self).__init__(data, **params) - if dataset: - # Histogram is a special case in which we keep the data from the - # input dataset rather than replace it with the element data. - # This is so that dataset contains the data needed to reconstruct - # the element. - self._dataset = dataset.clone() - - def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides): - if 'dataset' in overrides: - dataset = overrides.pop('dataset', None) - else: - dataset = self.dataset - - overrides["dataset"] = None - - new_element = super(Histogram, self).clone( - data=data, - shared_data=shared_data, - new_type=new_type, - _operation_kwargs=copy.deepcopy(self._operation_kwargs), - *args, - **overrides - ) - - if dataset: - # Histogram is a special case in which we keep the data from the - # input dataset rather than replace it with the element data. - # This is so that dataset contains the data needed to reconstruct - # the element. - new_element._dataset = dataset.clone() - - return new_element - - def select(self, selection_specs=None, **selection): - selected = super(Histogram, self).select( - selection_specs=selection_specs, **selection - ) - - if not np.isscalar(selected) and not np.array_equal(selected.data, self.data): - # Selection changed histogram bins, so update dataset - selection = { - dim: sel for dim, sel in selection.items() - if dim in self.dimensions()+['selection_mask'] - } - - if selected._dataset is not None: - selected._dataset = self.dataset.select(**selection) - - return selected - def _get_selection_expr_for_stream_value(self, **kwargs): from ..util.transform import dim diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index c785b23684..acedf62c34 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -656,19 +656,12 @@ def _process(self, element, key=None): if self.p.normed in (True, 'integral'): hist *= edges[1]-edges[0] - # Save off the kwargs needed to reproduce this Histogram later. - # We remove the properties that are used as instructions for how to - # calculate the bins, and replace those with the explicit list of bin - # edges. This way, not only can we regenerate this exact histogram - # from the same data set, but we can also generate a histogram using - # a different dataset that will share the exact same bins. - exclusions = {'log', 'bin_range', 'num_bins'} - params['_operation_kwargs'] = { - k: v for k, v in self.p.items() if k not in exclusions - } - params['_operation_kwargs']['bins'] = list(edges) + # Save off the computed bin edges so that if this operation instance + # is used to compute another histogram, it will default to the same + # bin edges. + self.bins = list(edges) return Histogram((edges, hist), kdims=[element.get_dimension(selected_dim)], - label=element.label, dataset=element.dataset, **params) + label=element.label, **params) class decimate(Operation): diff --git a/holoviews/operation/timeseries.py b/holoviews/operation/timeseries.py index 6f1246d782..d9d2018cc1 100644 --- a/holoviews/operation/timeseries.py +++ b/holoviews/operation/timeseries.py @@ -17,7 +17,7 @@ class RollingBase(param.Parameterized): Whether to set the x-coordinate at the center or right edge of the window.""") - min_periods = param.Integer(default=None, doc=""" + min_periods = param.Integer(default=None, allow_None=True, doc=""" Minimum number of observations in window required to have a value (otherwise result is NaN).""") @@ -35,7 +35,7 @@ class rolling(Operation,RollingBase): Applies a function over a rolling window. """ - window_type = param.ObjectSelector(default=None, + window_type = param.ObjectSelector(default=None, allow_None=True, objects=['boxcar', 'triang', 'blackman', 'hamming', 'bartlett', 'parzen', 'bohman', 'blackmanharris', 'nuttall', 'barthann', 'kaiser', 'gaussian', 'general_gaussian', @@ -72,7 +72,7 @@ class resample(Operation): """ closed = param.ObjectSelector(default=None, objects=['left', 'right'], - doc="Which side of bin interval is closed") + doc="Which side of bin interval is closed", allow_None=True) function = param.Callable(default=np.mean, doc=""" Function for computing new values out of existing ones.""") diff --git a/holoviews/tests/core/testdatasetproperty.py b/holoviews/tests/core/testdatasetproperty.py index 6296fe75ea..d90106de2b 100644 --- a/holoviews/tests/core/testdatasetproperty.py +++ b/holoviews/tests/core/testdatasetproperty.py @@ -1,7 +1,11 @@ from holoviews.element.comparison import ComparisonTestCase import pandas as pd from holoviews import Dataset, Curve, Dimension, Scatter, Distribution +from holoviews.operation import histogram +from holoviews.operation.datashader import dynspread, datashade, rasterize import dask.dataframe as dd +import numpy as np + class DatasetPropertyTestCase(ComparisonTestCase): @@ -23,36 +27,82 @@ def setUp(self): ] ) + self.ds2 = Dataset( + self.df.iloc[2:], + kdims=[ + Dimension('a', label="The a Column"), + Dimension('b', label="The b Column"), + Dimension('c', label="The c Column"), + Dimension('d', label="The d Column"), + ] + ) + class ConstructorTestCase(DatasetPropertyTestCase): def test_constructors_dataset(self): - expected = Dataset(self.df) - self.assertIs(expected, expected.dataset) + ds = Dataset(self.df) + self.assertIs(ds, ds.dataset) + + # Check pipeline + pipeline = ds.pipeline + self.assertEqual(len(pipeline), 1) + self.assertIs(pipeline[0][0], Dataset) + self.assertEqual(ds, ds.execute_pipeline()) def test_constructor_curve(self): element = Curve(self.df) - expected = Dataset(self.df) + expected = Dataset( + self.df, + kdims=self.df.columns[0], + vdims=self.df.columns[1:].tolist(), + ) self.assertEqual(element.dataset, expected) + # Check pipeline + pipeline = element.pipeline + self.assertEqual(len(pipeline), 1) + self.assertIs(pipeline[0][0], Curve) + self.assertEqual(element, element.execute_pipeline()) + class ToTestCase(DatasetPropertyTestCase): def test_to_element(self): curve = self.ds.to(Curve, 'a', 'b', groupby=[]) + curve2 = self.ds2.to(Curve, 'a', 'b', groupby=[]) + self.assertNotEqual(curve, curve2) + self.assertEqual(curve.dataset, self.ds) scatter = curve.to(Scatter) self.assertEqual(scatter.dataset, self.ds) + # Check pipeline + pipeline = curve.pipeline + self.assertEqual(len(pipeline), 2) + self.assertIs(pipeline[0][0], Dataset) + self.assertIs(pipeline[1][0], Curve) + + # Execute pipeline + self.assertEqual(curve.execute_pipeline(), curve) + self.assertEqual( + curve.execute_pipeline(self.ds2), curve2 + ) + def test_to_holomap(self): curve_hmap = self.ds.to(Curve, 'a', 'b', groupby=['c']) # Check HoloMap element datasets for v in self.df.c.drop_duplicates(): curve = curve_hmap.data[(v,)] + + # check dataset self.assertEqual( - curve.dataset, self.ds.select(c=v) + curve.dataset, self.ds ) + # execute pipeline + self.assertEqual(curve.execute_pipeline(), curve) + def test_to_holomap_dask(self): ddf = dd.from_pandas(self.df, npartitions=2) dds = Dataset( @@ -71,9 +121,12 @@ def test_to_holomap_dask(self): for v in self.df.c.drop_duplicates(): curve = curve_hmap.data[(v,)] self.assertEqual( - curve.dataset, self.ds.select(c=v) + curve.dataset, self.ds ) + # Execute pipeline + self.assertEqual(curve.execute_pipeline(), curve) + class CloneTestCase(DatasetPropertyTestCase): def test_clone(self): @@ -81,73 +134,473 @@ def test_clone(self): self.assertEqual(self.ds.clone().dataset, self.ds) # Curve + curve = self.ds.to.curve('a', 'b', groupby=[]) + curve_clone = curve.clone() self.assertEqual( - self.ds.to.curve('a', 'b', groupby=[]).clone().dataset, + curve_clone.dataset, self.ds ) + # Check pipeline carried over + self.assertEqual(curve.pipeline, curve_clone.pipeline[:2]) + + # Execute pipeline + self.assertEqual(curve.execute_pipeline(), curve) + class ReindexTestCase(DatasetPropertyTestCase): def test_reindex_dataset(self): ds_ab = self.ds.reindex(kdims=['a'], vdims=['b']) + ds2_ab = self.ds2.reindex(kdims=['a'], vdims=['b']) + self.assertNotEqual(ds_ab, ds2_ab) + self.assertEqual(ds_ab.dataset, self.ds) + # Check pipeline + pipeline = ds_ab.pipeline + self.assertEqual(len(pipeline), 2) + self.assertIs(pipeline[0][0], Dataset) + self.assertTrue(callable(pipeline[1][0])) + self.assertEqual(pipeline[1][0].__name__, 'reindex') + self.assertEqual(pipeline[1][1], []) + self.assertEqual(pipeline[1][2], dict(kdims=['a'], vdims=['b'])) + + # Execute pipeline + self.assertEqual(ds_ab.execute_pipeline(), ds_ab) + self.assertEqual( + ds_ab.execute_pipeline(self.ds2), ds2_ab + ) + def test_double_reindex_dataset(self): - ds_abc = self.ds.reindex(kdims=['a'], vdims=['b', 'c']) - ds_ab = ds_abc.reindex(kdims=['a'], vdims=['b']) + ds_ab = (self.ds + .reindex(kdims=['a'], vdims=['b', 'c']) + .reindex(kdims=['a'], vdims=['b'])) + ds2_ab = (self.ds2 + .reindex(kdims=['a'], vdims=['b', 'c']) + .reindex(kdims=['a'], vdims=['b'])) + self.assertNotEqual(ds_ab, ds2_ab) + self.assertEqual(ds_ab.dataset, self.ds) + # Check pipeline + pipeline = ds_ab.pipeline + self.assertEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertTrue(callable(pipeline[1][0])) + self.assertEqual(pipeline[1][0].__name__, 'reindex') + self.assertEqual(pipeline[1][1], []) + self.assertEqual(pipeline[1][2], dict(kdims=['a'], vdims=['b', 'c'])) + self.assertEqual(pipeline[2][0].__name__, 'reindex') + self.assertEqual(pipeline[2][1], []) + self.assertEqual(pipeline[2][2], dict(kdims=['a'], vdims=['b'])) + + # Execute pipeline + self.assertEqual(ds_ab.execute_pipeline(), ds_ab) + self.assertEqual( + ds_ab.execute_pipeline(self.ds2), ds2_ab + ) + def test_reindex_curve(self): - curve_ab = self.ds.to(Curve, 'a', 'b', groupby=[]) - curve_ba = curve_ab.reindex(kdims='b', vdims='a') - self.assertEqual(curve_ab.dataset, self.ds) + curve_ba = self.ds.to( + Curve, 'a', 'b', groupby=[] + ).reindex(kdims='b', vdims='a') + curve2_ba = self.ds2.to( + Curve, 'a', 'b', groupby=[] + ).reindex(kdims='b', vdims='a') + self.assertNotEqual(curve_ba, curve2_ba) + self.assertEqual(curve_ba.dataset, self.ds) + # Check pipeline + pipeline = curve_ba.pipeline + self.assertEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertIs(pipeline[1][0], Curve) + self.assertTrue(callable(pipeline[2][0])) + self.assertEqual(pipeline[2][0].__name__, 'reindex') + self.assertEqual(pipeline[2][1], []) + self.assertEqual(pipeline[2][2], dict(kdims='b', vdims='a')) + + # Execute pipeline + self.assertEqual(curve_ba.execute_pipeline(), curve_ba) + self.assertEqual( + curve_ba.execute_pipeline(self.ds2), curve2_ba + ) + def test_double_reindex_curve(self): - curve_abc = self.ds.to(Curve, 'a', ['b', 'c'], groupby=[]) - curve_ab = curve_abc.reindex(kdims='a', vdims='b') - curve_ba = curve_ab.reindex(kdims='b', vdims='a') - self.assertEqual(curve_ab.dataset, self.ds) + curve_ba = self.ds.to( + Curve, 'a', ['b', 'c'], groupby=[] + ).reindex(kdims='a', vdims='b').reindex(kdims='b', vdims='a') + curve2_ba = self.ds2.to( + Curve, 'a', ['b', 'c'], groupby=[] + ).reindex(kdims='a', vdims='b').reindex(kdims='b', vdims='a') + self.assertNotEqual(curve_ba, curve2_ba) + self.assertEqual(curve_ba.dataset, self.ds) + # Check pipeline + pipeline = curve_ba.pipeline + self.assertEqual(len(pipeline), 4) + self.assertIs(pipeline[0][0], Dataset) + self.assertIs(pipeline[1][0], Curve) + self.assertTrue(callable(pipeline[2][0])) + self.assertEqual(pipeline[2][0].__name__, 'reindex') + self.assertEqual(pipeline[2][1], []) + self.assertEqual(pipeline[2][2], dict(kdims='a', vdims='b')) + self.assertEqual(pipeline[3][0].__name__, 'reindex') + self.assertEqual(pipeline[3][1], []) + self.assertEqual(pipeline[3][2], dict(kdims='b', vdims='a')) + + # Execute pipeline + self.assertEqual(curve_ba.execute_pipeline(), curve_ba) + self.assertEqual( + curve_ba.execute_pipeline(self.ds2), curve2_ba + ) + class IlocTestCase(DatasetPropertyTestCase): def test_iloc_dataset(self): - expected = self.ds.iloc[[0, 2]] + ds_iloc = self.ds.iloc[[0, 2]] + ds2_iloc = self.ds2.iloc[[0, 2]] + self.assertNotEqual(ds_iloc, ds2_iloc) # Dataset self.assertEqual( - self.ds.clone().iloc[[0, 2]].dataset, - expected + ds_iloc.dataset, + self.ds ) - def test_iloc_curve(self): - expected = self.ds.iloc[[0, 2]] + # Check pipeline + pipeline = ds_iloc.pipeline + self.assertEqual(len(pipeline), 2) + self.assertIs(pipeline[0][0], Dataset) + self.assertTrue(callable(pipeline[1][0])) + self.assertEqual(pipeline[1][0].__name__, '_perform_getitem') + self.assertEqual(pipeline[1][1], [[0, 2]]) + self.assertEqual(pipeline[1][2], {}) + + # Execute pipeline + self.assertEqual(ds_iloc.execute_pipeline(), ds_iloc) + self.assertEqual( + ds_iloc.execute_pipeline(self.ds2), ds2_iloc + ) + def test_iloc_curve(self): # Curve - curve = self.ds.to.curve('a', 'b', groupby=[]) + curve_iloc = self.ds.to.curve('a', 'b', groupby=[]).iloc[[0, 2]] + curve2_iloc = self.ds2.to.curve('a', 'b', groupby=[]).iloc[[0, 2]] + self.assertNotEqual(curve_iloc, curve2_iloc) + + self.assertEqual( + curve_iloc.dataset, + self.ds + ) + + # Check pipeline + pipeline = curve_iloc.pipeline + self.assertEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertIs(pipeline[1][0], Curve) + self.assertTrue(callable(pipeline[2][0])) + self.assertEqual(pipeline[2][0].__name__, '_perform_getitem') + self.assertEqual(pipeline[2][1], [[0, 2]]) + self.assertEqual(pipeline[2][2], {}) + + # Execute pipeline + self.assertEqual(curve_iloc.execute_pipeline(), curve_iloc) self.assertEqual( - curve.iloc[[0, 2]].dataset, - expected + curve_iloc.execute_pipeline(self.ds2), curve2_iloc + ) + + +class NdlocTestCase(DatasetPropertyTestCase): + def setUp(self): + super(NdlocTestCase, self).setUp() + self.ds_grid = Dataset( + (np.arange(4), + np.arange(3), + np.array([[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12]])), + kdims=['x', 'y'], + vdims='z' + ) + + self.ds2_grid = Dataset( + (np.arange(3), + np.arange(3), + np.array([[1, 2, 4], + [5, 6, 8], + [9, 10, 12]])), + kdims=['x', 'y'], + vdims='z' + ) + + def test_ndloc_dataset(self): + ds_grid_ndloc = self.ds_grid.ndloc[0:2, 1:3] + ds2_grid_ndloc = self.ds2_grid.ndloc[0:2, 1:3] + self.assertNotEqual(ds_grid_ndloc, ds2_grid_ndloc) + + # Dataset + self.assertEqual( + ds_grid_ndloc.dataset, + self.ds_grid + ) + + # Check pipeline + pipeline = ds_grid_ndloc.pipeline + self.assertEqual(len(pipeline), 2) + self.assertIs(pipeline[0][0], Dataset) + self.assertTrue(callable(pipeline[1][0])) + self.assertEqual(pipeline[1][0].__name__, '_perform_getitem') + self.assertEqual( + pipeline[1][1], [(slice(0, 2, None), slice(1, 3, None))] + ) + self.assertEqual(pipeline[1][2], {}) + + # Execute pipeline + self.assertEqual(ds_grid_ndloc.execute_pipeline(), ds_grid_ndloc) + self.assertEqual( + ds_grid_ndloc.execute_pipeline(self.ds2_grid), ds2_grid_ndloc ) class SelectTestCase(DatasetPropertyTestCase): def test_select_dataset(self): + ds_select = self.ds.select(b=10) + ds2_select = self.ds2.select(b=10) + self.assertNotEqual(ds_select, ds2_select) + + # Dataset self.assertEqual( - self.ds.select(b=10).dataset, - self.ds.select(b=10) + ds_select.dataset, + self.ds + ) + + # Check pipeline + pipeline = ds_select.pipeline + self.assertEqual(len(pipeline), 2) + self.assertIs(pipeline[0][0], Dataset) + self.assertTrue(callable(pipeline[1][0])) + self.assertEqual(pipeline[1][0].__name__, 'select') + self.assertEqual(pipeline[1][1], []) + self.assertEqual(pipeline[1][2], {'b': 10}) + + # Execute pipeline + self.assertEqual(ds_select.execute_pipeline(), ds_select) + self.assertEqual( + ds_select.execute_pipeline(self.ds2), ds2_select ) def test_select_curve(self): + curve_select = self.ds.to.curve('a', 'b', groupby=[]).select(b=10) + curve2_select = self.ds2.to.curve('a', 'b', groupby=[]).select(b=10) + self.assertNotEqual(curve_select, curve2_select) + # Curve self.assertEqual( - self.ds.to.curve('a', 'b', groupby=[]).select(b=10).dataset, - self.ds.select(b=10) + curve_select.dataset, + self.ds + ) + + # Check pipeline + pipeline = curve_select.pipeline + self.assertEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertIs(pipeline[1][0], Curve) + self.assertTrue(callable(pipeline[2][0])) + self.assertEqual(pipeline[2][0].__name__, 'select') + self.assertEqual(pipeline[2][1], []) + self.assertEqual(pipeline[2][2], {'b': 10}) + + # Execute pipeline + self.assertEqual(curve_select.execute_pipeline(), curve_select) + self.assertEqual( + curve_select.execute_pipeline(self.ds2), curve2_select + ) + + +class SortTestCase(DatasetPropertyTestCase): + def test_sort_curve(self): + curve_sorted = self.ds.to.curve('a', 'b', groupby=[]).sort('a') + curve_sorted2 = self.ds2.to.curve('a', 'b', groupby=[]).sort('a') + self.assertNotEqual(curve_sorted, curve_sorted2) + + # Curve + self.assertEqual( + curve_sorted.dataset, + self.ds + ) + + # Check pipeline + pipeline = curve_sorted.pipeline + self.assertEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertIs(pipeline[1][0], Curve) + self.assertTrue(callable(pipeline[2][0])) + self.assertEqual(pipeline[2][0].__name__, 'sort') + self.assertEqual(pipeline[2][1], ['a']) + self.assertEqual(pipeline[2][2], {}) + + # Execute pipeline + self.assertEqual(curve_sorted.execute_pipeline(), curve_sorted) + self.assertEqual( + curve_sorted.execute_pipeline(self.ds2), curve_sorted2 + ) + + +class SampleTestCase(DatasetPropertyTestCase): + def test_sample_curve(self): + curve_sampled = self.ds.to.curve('a', 'b', groupby=[]).sample([1, 2]) + curve_sampled2 = self.ds2.to.curve('a', 'b', groupby=[]).sample([1, 2]) + self.assertNotEqual(curve_sampled, curve_sampled2) + + # Curve + self.assertEqual( + curve_sampled.dataset, + self.ds + ) + + # Check pipeline + pipeline = curve_sampled.pipeline + self.assertEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertIs(pipeline[1][0], Curve) + self.assertTrue(callable(pipeline[2][0])) + self.assertEqual(pipeline[2][0].__name__, 'sample') + self.assertEqual(pipeline[2][1], [[1, 2]]) + self.assertEqual(pipeline[2][2], {}) + + # Execute pipeline + self.assertEqual(curve_sampled.execute_pipeline(), curve_sampled) + self.assertEqual( + curve_sampled.execute_pipeline(self.ds2), curve_sampled2 + ) + + +class ReduceTestCase(DatasetPropertyTestCase): + def test_reduce_dataset(self): + ds_reduced = self.ds.reindex( + kdims=['b', 'c'], vdims=['a', 'd'] + ).reduce('c', function=np.sum) + + ds2_reduced = self.ds2.reindex( + kdims=['b', 'c'], vdims=['a', 'd'] + ).reduce('c', function=np.sum) + + self.assertNotEqual(ds_reduced, ds2_reduced) + self.assertEqual(ds_reduced.dataset, self.ds) + self.assertEqual(ds2_reduced.dataset, self.ds2) + + # Check pipeline + pipeline = ds_reduced.pipeline + self.assertEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertTrue(callable(pipeline[1][0])) + self.assertEqual(pipeline[1][0].__name__, 'reindex') + self.assertEqual(pipeline[2][0].__name__, 'reduce') + self.assertEqual(pipeline[2][1], ['c']) + self.assertEqual(pipeline[2][2], {'function': np.sum}) + + # Execute pipeline + self.assertEqual(ds_reduced.execute_pipeline(), ds_reduced) + self.assertEqual( + ds_reduced.execute_pipeline(self.ds2), ds2_reduced ) +class AggregateTestCase(DatasetPropertyTestCase): + def test_aggregate_dataset(self): + ds_aggregated = self.ds.reindex( + kdims=['b', 'c'], vdims=['a', 'd'] + ).aggregate('b', function=np.sum) + + ds2_aggregated = self.ds2.reindex( + kdims=['b', 'c'], vdims=['a', 'd'] + ).aggregate('b', function=np.sum) + + self.assertNotEqual(ds_aggregated, ds2_aggregated) + self.assertEqual(ds_aggregated.dataset, self.ds) + self.assertEqual(ds2_aggregated.dataset, self.ds2) + + # Check pipeline + pipeline = ds_aggregated.pipeline + self.assertEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertTrue(callable(pipeline[1][0])) + self.assertEqual(pipeline[1][0].__name__, 'reindex') + self.assertEqual(pipeline[2][0].__name__, 'aggregate') + self.assertEqual(pipeline[2][1], ['b']) + self.assertEqual(pipeline[2][2], {'function': np.sum}) + + # Execute pipeline + self.assertEqual(ds_aggregated.execute_pipeline(), ds_aggregated) + self.assertEqual( + ds_aggregated.execute_pipeline(self.ds2), ds2_aggregated + ) + + +class GroupbyTestCase(DatasetPropertyTestCase): + def test_groupby_dataset(self): + ds_groups = self.ds.reindex( + kdims=['b', 'c'], vdims=['a', 'd'] + ).groupby('b') + + ds2_groups = self.ds2.reindex( + kdims=['b', 'c'], vdims=['a', 'd'] + ).groupby('b') + + self.assertNotEqual(ds_groups, ds2_groups) + for k in ds_groups.keys(): + ds_group = ds_groups[k] + ds2_group = ds2_groups[k] + + # Check pipeline + pipeline = ds_group.pipeline + self.assertNotEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertEqual(pipeline[1][0].__name__, 'reindex') + self.assertEqual(pipeline[2][0].__name__, 'groupby') + self.assertEqual(pipeline[2][1], ['b']) + self.assertEqual(pipeline[3][0].__name__, '__getitem__') + self.assertEqual(pipeline[3][1], [k]) + + # Execute pipeline + self.assertEqual(ds_group.execute_pipeline(), ds_group) + self.assertEqual( + ds_group.execute_pipeline(self.ds2), ds2_group + ) + + +class AddDimensionTestCase(DatasetPropertyTestCase): + def test_add_dimension_dataset(self): + ds_dim_added = self.ds.add_dimension('new', 1, 17) + ds2_dim_added = self.ds2.add_dimension('new', 1, 17) + self.assertNotEqual(ds_dim_added, ds2_dim_added) + + # Check dataset + self.assertEqual(ds_dim_added.dataset, self.ds) + self.assertEqual(ds2_dim_added.dataset, self.ds2) + + # Check pipeline + pipeline = ds_dim_added.pipeline + self.assertEqual(len(pipeline), 2) + self.assertIs(pipeline[0][0], Dataset) + self.assertEqual(pipeline[1][0].__name__, 'add_dimension') + self.assertEqual(pipeline[1][1], ['new', 1, 17]) + self.assertEqual(pipeline[1][2], {}) + + # Execute pipeline + self.assertEqual(ds_dim_added.execute_pipeline(), ds_dim_added) + self.assertEqual( + ds_dim_added.execute_pipeline(self.ds2), ds2_dim_added, + ) + + +# +# Add execute pipeline test for each method, using a different dataset (ds2) +# class HistogramTestCase(DatasetPropertyTestCase): def setUp(self): @@ -162,7 +615,20 @@ def test_clone(self): def test_select_single(self): sub_hist = self.hist.select(a=(1, None)) - self.assertEqual(sub_hist.dataset, self.ds.select(a=(1, None))) + self.assertEqual(sub_hist.dataset, self.ds) + + # Check pipeline + pipeline = sub_hist.pipeline + self.assertEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertIsInstance(pipeline[1][0], histogram) + self.assertTrue(callable(pipeline[2][0])) + self.assertEqual(pipeline[2][0].__name__, 'select') + self.assertEqual(pipeline[2][1], []) + self.assertEqual(pipeline[2][2], {'a': (1, None)}) + + # Execute pipeline + self.assertEqual(sub_hist.execute_pipeline(), sub_hist) def test_select_multi(self): # Add second selection on b. b is a dimension in hist.dataset but @@ -175,14 +641,38 @@ def test_select_multi(self): self.ds.select(a=(1, None), b=100) ) + # Check dataset unchanged self.assertEqual( sub_hist.dataset, - self.ds.select(a=(1, None)) + self.ds ) + # Check pipeline + pipeline = sub_hist.pipeline + self.assertEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertIsInstance(pipeline[1][0], histogram) + self.assertTrue(callable(pipeline[2][0])) + self.assertEqual(pipeline[2][0].__name__, 'select') + self.assertEqual(pipeline[2][1], []) + self.assertEqual(pipeline[2][2], {'a': (1, None), 'b': 100}) + + # Execute pipeline + self.assertEqual(sub_hist.execute_pipeline(), sub_hist) + def test_hist_to_curve(self): # No exception thrown - self.hist.to.curve() + curve = self.hist.to.curve() + + # Check pipeline + pipeline = curve.pipeline + self.assertEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertIsInstance(pipeline[1][0], histogram) + self.assertIs(pipeline[2][0], Curve) + + # Execute pipeline + self.assertEqual(curve.execute_pipeline(), curve) class DistributionTestCase(DatasetPropertyTestCase): @@ -193,3 +683,58 @@ def setUp(self): def test_distribution_dataset(self): self.assertEqual(self.distribution.dataset, self.ds) + + # Execute pipeline + self.assertEqual( + self.distribution.execute_pipeline(), self.distribution + ) + + +class DatashaderTestCase(DatasetPropertyTestCase): + + def test_rasterize_curve(self): + img = rasterize( + self.ds.to(Curve, 'a', 'b', groupby=[]), dynamic=False + ) + img2 = rasterize( + self.ds2.to(Curve, 'a', 'b', groupby=[]), dynamic=False + ) + self.assertNotEqual(img, img2) + + # Check dataset + self.assertEqual(img.dataset, self.ds) + + # Check pipeline + pipeline = img.pipeline + self.assertEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertIs(pipeline[1][0], Curve) + self.assertIsInstance(pipeline[2][0], rasterize) + + # Execute pipeline + self.assertEqual(img.execute_pipeline(), img) + self.assertEqual(img.execute_pipeline(self.ds2), img2) + + def test_datashade_curve(self): + rgb = dynspread(datashade( + self.ds.to(Curve, 'a', 'b', groupby=[]), dynamic=False + ), dynamic=False) + rgb2 = dynspread(datashade( + self.ds2.to(Curve, 'a', 'b', groupby=[]), dynamic=False + ), dynamic=False) + self.assertNotEqual(rgb, rgb2) + + # Check dataset + self.assertEqual(rgb.dataset, self.ds) + + # Check pipeline + pipeline = rgb.pipeline + self.assertEqual(len(pipeline), 4) + self.assertIs(pipeline[0][0], Dataset) + self.assertIs(pipeline[1][0], Curve) + self.assertIsInstance(pipeline[2][0], datashade) + self.assertIsInstance(pipeline[3][0], dynspread) + + # Execute pipeline + self.assertEqual(rgb.execute_pipeline(), rgb) + self.assertEqual(rgb.execute_pipeline(self.ds2), rgb2) diff --git a/holoviews/tests/operation/testoperation.py b/holoviews/tests/operation/testoperation.py index caec21c96e..5459c7e39d 100644 --- a/holoviews/tests/operation/testoperation.py +++ b/holoviews/tests/operation/testoperation.py @@ -147,46 +147,6 @@ def test_points_histogram(self): vdims=('x_frequency', 'Frequency')) self.assertEqual(op_hist, hist) - def test_histogram_operation_kwargs(self): - points = Points([float(j) for i in range(10) for j in [i] * (2 * i)]) - op_hist = histogram( - points, - dimension='y', - normed=False, - num_bins=10, - bin_range=[0, 10], - ) - - hist = Histogram(( - [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] - ), vdims=('y_count', 'Count'), kdims='y') - - # Check histogram - self.assertEqual(op_hist, hist) - - # Check operation kwargs for histogram generated with operation - self.assertEqual( - op_hist._operation_kwargs, - {'dimension': 'y', - 'normed': False, - 'dynamic': False, - 'bins': [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]} - ) - - # Test that operation_kwargs is preserved through clone - self.assertEqual( - op_hist.clone()._operation_kwargs, - {'dimension': 'y', - 'normed': False, - 'dynamic': False, - 'bins': [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]} - ) - - # Check that operation kwargs is None for histogram generated directly - # from the Histogram constructor - self.assertIsNone(hist._operation_kwargs) - @da_skip def test_dataset_histogram_dask(self): import dask.array as da From 702e5313c5e39e3f847c172d2d66cfc965d1eddf Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Wed, 18 Sep 2019 08:51:37 -0400 Subject: [PATCH 02/23] Add pipeline support to apply, redim, and opts accessors --- holoviews/core/accessors.py | 46 +++++++++++++++++- holoviews/tests/core/testdatasetproperty.py | 52 +++++++++++++++++++++ 2 files changed, 96 insertions(+), 2 deletions(-) diff --git a/holoviews/core/accessors.py b/holoviews/core/accessors.py index 56d7bb8e7b..ea355c4014 100644 --- a/holoviews/core/accessors.py +++ b/holoviews/core/accessors.py @@ -6,11 +6,53 @@ from collections import OrderedDict import param +from param.parameterized import add_metaclass from . import util from .pprint import PrettyPrinter +class AccessorPipelineMeta(type): + def __new__(mcs, classname, bases, classdict): + if '__call__' in classdict: + classdict['__call__'] = mcs.pipelined(classdict['__call__']) + + inst = type.__new__(mcs, classname, bases, classdict) + inst._in_method = False + return inst + + @classmethod + def pipelined(mcs, __call__): + def pipelined_call(*a, **k): + from .data import Dataset, MultiDimensionalMapping + inst = a[0] + in_method = inst._obj._in_method + if not in_method: + inst._obj._in_method = True + + result = __call__(*a, **k) + + if not in_method: + mode = getattr(inst, 'mode', None) + if isinstance(result, Dataset): + result._pipeline = inst._obj._pipeline + [ + (type(inst), [], {'mode': mode}), + (__call__, list(a[1:]), k) + ] + elif isinstance(result, MultiDimensionalMapping): + for key, element in result.items(): + element._pipeline = inst._obj._pipeline + [ + (type(inst), [], {'mode': mode}), + (__call__, list(a[1:]), k), + (getattr(type(result), '__getitem__'), [key], {}) + ] + inst._obj._in_method = False + return result + + return pipelined_call + + +@add_metaclass(AccessorPipelineMeta) class Apply(object): """ Utility to apply a function or operation to all viewable elements @@ -151,7 +193,7 @@ def select(self, **kwargs): return self.__call__('select', **kwargs) - +@add_metaclass(AccessorPipelineMeta) class Redim(object): """ Utility that supports re-dimensioning any HoloViews object via the @@ -306,7 +348,7 @@ def values(self, specs=None, **ranges): return self._redim('values', specs, **ranges) - +@add_metaclass(AccessorPipelineMeta) class Opts(object): def __init__(self, obj, mode=None): diff --git a/holoviews/tests/core/testdatasetproperty.py b/holoviews/tests/core/testdatasetproperty.py index d90106de2b..31272a7ddd 100644 --- a/holoviews/tests/core/testdatasetproperty.py +++ b/holoviews/tests/core/testdatasetproperty.py @@ -1,3 +1,4 @@ +from holoviews.core import Apply, Redim from holoviews.element.comparison import ComparisonTestCase import pandas as pd from holoviews import Dataset, Curve, Dimension, Scatter, Distribution @@ -738,3 +739,54 @@ def test_datashade_curve(self): # Execute pipeline self.assertEqual(rgb.execute_pipeline(), rgb) self.assertEqual(rgb.execute_pipeline(self.ds2), rgb2) + + +class AccessorTestCase(DatasetPropertyTestCase): + def test_apply_curve(self): + curve = self.ds.to.curve('a', 'b', groupby=[]).apply( + lambda c: Scatter(c.select(b=(20, None)).data) + ) + curve2 = self.ds2.to.curve('a', 'b', groupby=[]).apply( + lambda c: Scatter(c.select(b=(20, None)).data) + ) + self.assertNotEqual(curve, curve2) + + # Check pipeline + pipeline = curve.pipeline + self.assertEqual(len(pipeline), 4) + self.assertIs(pipeline[0][0], Dataset) + self.assertIs(pipeline[1][0], Curve) + self.assertIs(pipeline[2][0], Apply) + self.assertEqual(pipeline[2][2], {'mode': None}) + self.assertEqual(pipeline[3][0].__name__, '__call__') + + # Execute pipeline + self.assertEqual(curve.execute_pipeline(), curve) + self.assertEqual( + curve.execute_pipeline(self.ds2), curve2 + ) + + def test_redim_curve(self): + curve = self.ds.to.curve('a', 'b', groupby=[]).redim.unit( + a='kg', b='m' + ) + + curve2 = self.ds2.to.curve('a', 'b', groupby=[]).redim.unit( + a='kg', b='m' + ) + self.assertNotEqual(curve, curve2) + + # Check pipeline + pipeline = curve.pipeline + self.assertEqual(len(pipeline), 4) + self.assertIs(pipeline[0][0], Dataset) + self.assertIs(pipeline[1][0], Curve) + self.assertIs(pipeline[2][0], Redim) + self.assertEqual(pipeline[2][2], {'mode': 'dataset'}) + self.assertEqual(pipeline[3][0].__name__, '__call__') + + # Execute pipeline + self.assertEqual(curve.execute_pipeline(), curve) + self.assertEqual( + curve.execute_pipeline(self.ds2), curve2 + ) From 18d354881ac31221fa93cd1cc6d5d5bdb2e255cc Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Wed, 18 Sep 2019 09:12:54 -0400 Subject: [PATCH 03/23] Guard against accessors that wrap objects without pipeline support --- holoviews/core/accessors.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/holoviews/core/accessors.py b/holoviews/core/accessors.py index ea355c4014..615e146cff 100644 --- a/holoviews/core/accessors.py +++ b/holoviews/core/accessors.py @@ -26,6 +26,10 @@ def pipelined(mcs, __call__): def pipelined_call(*a, **k): from .data import Dataset, MultiDimensionalMapping inst = a[0] + if not hasattr(inst._obj, '_pipeline'): + # Wrapped object doesn't support the pipeline property + return __call__(*a, **k) + in_method = inst._obj._in_method if not in_method: inst._obj._in_method = True From 8ed7c5f946858af6354289c1809902678d602668 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Wed, 18 Sep 2019 09:19:02 -0400 Subject: [PATCH 04/23] Fix dataset property histogram tests now that apply is added to pipeline --- holoviews/tests/core/testdatasetproperty.py | 34 ++++++++++++--------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/holoviews/tests/core/testdatasetproperty.py b/holoviews/tests/core/testdatasetproperty.py index 31272a7ddd..4930116c02 100644 --- a/holoviews/tests/core/testdatasetproperty.py +++ b/holoviews/tests/core/testdatasetproperty.py @@ -620,13 +620,14 @@ def test_select_single(self): # Check pipeline pipeline = sub_hist.pipeline - self.assertEqual(len(pipeline), 3) + self.assertEqual(len(pipeline), 4) self.assertIs(pipeline[0][0], Dataset) - self.assertIsInstance(pipeline[1][0], histogram) - self.assertTrue(callable(pipeline[2][0])) - self.assertEqual(pipeline[2][0].__name__, 'select') - self.assertEqual(pipeline[2][1], []) - self.assertEqual(pipeline[2][2], {'a': (1, None)}) + self.assertIs(pipeline[1][0], Apply) + self.assertEqual(pipeline[2][0].__name__, '__call__') + self.assertIsInstance(pipeline[2][1][0], histogram) + self.assertEqual(pipeline[3][0].__name__, 'select') + self.assertEqual(pipeline[3][1], []) + self.assertEqual(pipeline[3][2], {'a': (1, None)}) # Execute pipeline self.assertEqual(sub_hist.execute_pipeline(), sub_hist) @@ -650,13 +651,14 @@ def test_select_multi(self): # Check pipeline pipeline = sub_hist.pipeline - self.assertEqual(len(pipeline), 3) + self.assertEqual(len(pipeline), 4) self.assertIs(pipeline[0][0], Dataset) - self.assertIsInstance(pipeline[1][0], histogram) - self.assertTrue(callable(pipeline[2][0])) - self.assertEqual(pipeline[2][0].__name__, 'select') - self.assertEqual(pipeline[2][1], []) - self.assertEqual(pipeline[2][2], {'a': (1, None), 'b': 100}) + self.assertIs(pipeline[1][0], Apply) + self.assertEqual(pipeline[2][0].__name__, '__call__') + self.assertIsInstance(pipeline[2][1][0], histogram) + self.assertEqual(pipeline[3][0].__name__, 'select') + self.assertEqual(pipeline[3][1], []) + self.assertEqual(pipeline[3][2], {'a': (1, None), 'b': 100}) # Execute pipeline self.assertEqual(sub_hist.execute_pipeline(), sub_hist) @@ -667,10 +669,12 @@ def test_hist_to_curve(self): # Check pipeline pipeline = curve.pipeline - self.assertEqual(len(pipeline), 3) + self.assertEqual(len(pipeline), 4) self.assertIs(pipeline[0][0], Dataset) - self.assertIsInstance(pipeline[1][0], histogram) - self.assertIs(pipeline[2][0], Curve) + self.assertIs(pipeline[1][0], Apply) + self.assertEqual(pipeline[2][0].__name__, '__call__') + self.assertIsInstance(pipeline[2][1][0], histogram) + self.assertIs(pipeline[3][0], Curve) # Execute pipeline self.assertEqual(curve.execute_pipeline(), curve) From e016dc91640a20beb1e4731af449d16aaa9965d3 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Wed, 18 Sep 2019 09:46:22 -0400 Subject: [PATCH 05/23] Copy docstrings to Metaclass wrapping methods --- holoviews/core/accessors.py | 2 ++ holoviews/core/data/__init__.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/holoviews/core/accessors.py b/holoviews/core/accessors.py index 615e146cff..8c3ec1e649 100644 --- a/holoviews/core/accessors.py +++ b/holoviews/core/accessors.py @@ -53,6 +53,8 @@ def pipelined_call(*a, **k): inst._obj._in_method = False return result + pipelined_call.__doc__ = __call__.__doc__ + return pipelined_call diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 0b4dda011b..28620abe58 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -211,6 +211,8 @@ def pipelined_fn(*a, **k): inst._in_method = False return result + pipelined_fn.__doc__ = method.__doc__ + return pipelined_fn From 11d84393d79017b3d47c7472a5a9311d103cd858 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Wed, 18 Sep 2019 09:47:10 -0400 Subject: [PATCH 06/23] change metaclass arg name to mcs --- holoviews/core/data/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 28620abe58..66d2466865 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -174,16 +174,16 @@ class PipelineMeta(ParameterizedMetaclass): # Public methods that should not be wrapped blacklist = ['__init__', 'clone', 'execute_pipeline'] - def __new__(cls, classname, bases, classdict): + def __new__(mcs, classname, bases, classdict): for method_name in classdict: method_fn = classdict[method_name] - if method_name in cls.blacklist or method_name.startswith('_'): + if method_name in mcs.blacklist or method_name.startswith('_'): continue elif isinstance(method_fn, types.FunctionType): - classdict[method_name] = cls.pipelined(method_fn) + classdict[method_name] = mcs.pipelined(method_fn) - inst = type.__new__(cls, classname, bases, classdict) + inst = type.__new__(mcs, classname, bases, classdict) inst._in_method = False return inst From 74047230e548e5ac58fbd670f2b76175e2e97cd2 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Wed, 18 Sep 2019 09:51:07 -0400 Subject: [PATCH 07/23] Override options method for pipeline support --- holoviews/core/data/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 66d2466865..3cabe19fa6 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -13,7 +13,7 @@ from .. import util from ..accessors import Redim -from ..dimension import Dimension, process_dimensions +from ..dimension import Dimension, process_dimensions, Dimensioned from ..element import Element from ..ndmapping import OrderedDict, MultiDimensionalMapping from ..spaces import HoloMap, DynamicMap @@ -1007,6 +1007,13 @@ def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides): return new_dataset + def options(self, *args, **kwargs): + # Override so that PipelineMeta finds method to wrap it with pipeline + # support + return super(Dataset, self).options(*args, **kwargs) + + options.__doc__ = Dimensioned.options.__doc__ + @property def iloc(self): """Returns iloc indexer with support for columnar indexing. From 2adf704c7cdce3ce2696ea15181d65192f83a9e3 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Wed, 18 Sep 2019 10:03:20 -0400 Subject: [PATCH 08/23] Add pipeline support for Dataset.map --- holoviews/core/data/__init__.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 3cabe19fa6..61f15c3faa 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -13,7 +13,9 @@ from .. import util from ..accessors import Redim -from ..dimension import Dimension, process_dimensions, Dimensioned +from ..dimension import ( + Dimension, process_dimensions, Dimensioned, LabelledData +) from ..element import Element from ..ndmapping import OrderedDict, MultiDimensionalMapping from ..spaces import HoloMap, DynamicMap @@ -1007,13 +1009,16 @@ def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides): return new_dataset + # Overrides of superclass methods that are needed so that PipelineMeta + # will find them to wrap with pipeline support def options(self, *args, **kwargs): - # Override so that PipelineMeta finds method to wrap it with pipeline - # support return super(Dataset, self).options(*args, **kwargs) - options.__doc__ = Dimensioned.options.__doc__ + def map(self, *args, **kwargs): + return super(Dataset, self).map(*args, **kwargs) + map.__doc__ = LabelledData.map.__doc__ + @property def iloc(self): """Returns iloc indexer with support for columnar indexing. From 7ea2ba5c64f6b2960f421637bd0f49e647abf88a Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Wed, 18 Sep 2019 10:05:24 -0400 Subject: [PATCH 09/23] standardize names of args to pipelined_call --- holoviews/core/accessors.py | 12 ++++++------ holoviews/core/data/__init__.py | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/holoviews/core/accessors.py b/holoviews/core/accessors.py index 8c3ec1e649..ec3ab3fc5e 100644 --- a/holoviews/core/accessors.py +++ b/holoviews/core/accessors.py @@ -23,31 +23,31 @@ def __new__(mcs, classname, bases, classdict): @classmethod def pipelined(mcs, __call__): - def pipelined_call(*a, **k): + def pipelined_call(*args, **kwargs): from .data import Dataset, MultiDimensionalMapping - inst = a[0] + inst = args[0] if not hasattr(inst._obj, '_pipeline'): # Wrapped object doesn't support the pipeline property - return __call__(*a, **k) + return __call__(*args, **kwargs) in_method = inst._obj._in_method if not in_method: inst._obj._in_method = True - result = __call__(*a, **k) + result = __call__(*args, **kwargs) if not in_method: mode = getattr(inst, 'mode', None) if isinstance(result, Dataset): result._pipeline = inst._obj._pipeline + [ (type(inst), [], {'mode': mode}), - (__call__, list(a[1:]), k) + (__call__, list(args[1:]), kwargs) ] elif isinstance(result, MultiDimensionalMapping): for key, element in result.items(): element._pipeline = inst._obj._pipeline + [ (type(inst), [], {'mode': mode}), - (__call__, list(a[1:]), k), + (__call__, list(args[1:]), kwargs), (getattr(type(result), '__getitem__'), [key], {}) ] inst._obj._in_method = False diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 61f15c3faa..06abfa6306 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -191,23 +191,23 @@ def __new__(mcs, classname, bases, classdict): @staticmethod def pipelined(method): - def pipelined_fn(*a, **k): - inst = a[0] + def pipelined_fn(*args, **kwargs): + inst = args[0] in_method = inst._in_method if not in_method: inst._in_method = True - result = method(*a, **k) + result = method(*args, **kwargs) if not in_method: if isinstance(result, Dataset): result._pipeline = inst._pipeline + [ - (method, list(a[1:]), k) + (method, list(args[1:]), kwargs) ] elif isinstance(result, MultiDimensionalMapping): for key, element in result.items(): element._pipeline = inst._pipeline + [ - (method, list(a[1:]), k), + (method, list(args[1:]), kwargs), (getattr(type(result), '__getitem__'), [key], {}) ] inst._in_method = False From 25d7674c0693a44ca754c333e6a1369bf952b371 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Wed, 18 Sep 2019 10:19:27 -0400 Subject: [PATCH 10/23] Fix pipeline tests now that `map` is a pipeline step --- holoviews/tests/core/testdatasetproperty.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/holoviews/tests/core/testdatasetproperty.py b/holoviews/tests/core/testdatasetproperty.py index 4930116c02..68b3c335a4 100644 --- a/holoviews/tests/core/testdatasetproperty.py +++ b/holoviews/tests/core/testdatasetproperty.py @@ -709,13 +709,6 @@ def test_rasterize_curve(self): # Check dataset self.assertEqual(img.dataset, self.ds) - # Check pipeline - pipeline = img.pipeline - self.assertEqual(len(pipeline), 3) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Curve) - self.assertIsInstance(pipeline[2][0], rasterize) - # Execute pipeline self.assertEqual(img.execute_pipeline(), img) self.assertEqual(img.execute_pipeline(self.ds2), img2) @@ -732,14 +725,6 @@ def test_datashade_curve(self): # Check dataset self.assertEqual(rgb.dataset, self.ds) - # Check pipeline - pipeline = rgb.pipeline - self.assertEqual(len(pipeline), 4) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Curve) - self.assertIsInstance(pipeline[2][0], datashade) - self.assertIsInstance(pipeline[3][0], dynspread) - # Execute pipeline self.assertEqual(rgb.execute_pipeline(), rgb) self.assertEqual(rgb.execute_pipeline(self.ds2), rgb2) From f9e1f734e91d9b2929218772272e11ad03775bc5 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Wed, 18 Sep 2019 10:19:47 -0400 Subject: [PATCH 11/23] remove trailing whitespace --- holoviews/core/accessors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/core/accessors.py b/holoviews/core/accessors.py index ec3ab3fc5e..55cd8068e1 100644 --- a/holoviews/core/accessors.py +++ b/holoviews/core/accessors.py @@ -162,7 +162,7 @@ def function(object, **kwargs): mapped.append((k, new_val)) return self._obj.clone(mapped, link=link_inputs) - + def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs): """Applies a aggregate function to all ViewableElements. @@ -226,7 +226,7 @@ def replace_dimensions(cls, dimensions, overrides): list: List of dimensions with replacements applied """ from .dimension import Dimension - + replaced = [] for d in dimensions: if d.name in overrides: From b7eef37f491c5304efecdde97bb8bc23c20d3816 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Wed, 18 Sep 2019 10:36:35 -0400 Subject: [PATCH 12/23] Revert "Fix pipeline tests now that `map` is a pipeline step" This reverts commit 25d7674c --- holoviews/tests/core/testdatasetproperty.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/holoviews/tests/core/testdatasetproperty.py b/holoviews/tests/core/testdatasetproperty.py index 68b3c335a4..4930116c02 100644 --- a/holoviews/tests/core/testdatasetproperty.py +++ b/holoviews/tests/core/testdatasetproperty.py @@ -709,6 +709,13 @@ def test_rasterize_curve(self): # Check dataset self.assertEqual(img.dataset, self.ds) + # Check pipeline + pipeline = img.pipeline + self.assertEqual(len(pipeline), 3) + self.assertIs(pipeline[0][0], Dataset) + self.assertIs(pipeline[1][0], Curve) + self.assertIsInstance(pipeline[2][0], rasterize) + # Execute pipeline self.assertEqual(img.execute_pipeline(), img) self.assertEqual(img.execute_pipeline(self.ds2), img2) @@ -725,6 +732,14 @@ def test_datashade_curve(self): # Check dataset self.assertEqual(rgb.dataset, self.ds) + # Check pipeline + pipeline = rgb.pipeline + self.assertEqual(len(pipeline), 4) + self.assertIs(pipeline[0][0], Dataset) + self.assertIs(pipeline[1][0], Curve) + self.assertIsInstance(pipeline[2][0], datashade) + self.assertIsInstance(pipeline[3][0], dynspread) + # Execute pipeline self.assertEqual(rgb.execute_pipeline(), rgb) self.assertEqual(rgb.execute_pipeline(self.ds2), rgb2) From 965759246120e39d7ec8ff3bc56a4448c5fedd53 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Wed, 18 Sep 2019 10:45:51 -0400 Subject: [PATCH 13/23] Handle pipeline functions that return the same element --- holoviews/core/accessors.py | 6 ++++-- holoviews/core/data/__init__.py | 5 +++-- holoviews/core/operation.py | 6 +++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/holoviews/core/accessors.py b/holoviews/core/accessors.py index 55cd8068e1..3b12b11b35 100644 --- a/holoviews/core/accessors.py +++ b/holoviews/core/accessors.py @@ -4,6 +4,7 @@ from __future__ import absolute_import, unicode_literals from collections import OrderedDict +import copy import param from param.parameterized import add_metaclass @@ -30,6 +31,7 @@ def pipelined_call(*args, **kwargs): # Wrapped object doesn't support the pipeline property return __call__(*args, **kwargs) + inst_pipeline = copy.copy(inst._obj. _pipeline) in_method = inst._obj._in_method if not in_method: inst._obj._in_method = True @@ -39,13 +41,13 @@ def pipelined_call(*args, **kwargs): if not in_method: mode = getattr(inst, 'mode', None) if isinstance(result, Dataset): - result._pipeline = inst._obj._pipeline + [ + result._pipeline = inst_pipeline + [ (type(inst), [], {'mode': mode}), (__call__, list(args[1:]), kwargs) ] elif isinstance(result, MultiDimensionalMapping): for key, element in result.items(): - element._pipeline = inst._obj._pipeline + [ + element._pipeline = inst_pipeline + [ (type(inst), [], {'mode': mode}), (__call__, list(args[1:]), kwargs), (getattr(type(result), '__getitem__'), [key], {}) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 06abfa6306..d419f27571 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -193,6 +193,7 @@ def __new__(mcs, classname, bases, classdict): def pipelined(method): def pipelined_fn(*args, **kwargs): inst = args[0] + inst_pipeline = copy.copy(getattr(inst, '_pipeline', None)) in_method = inst._in_method if not in_method: inst._in_method = True @@ -201,12 +202,12 @@ def pipelined_fn(*args, **kwargs): if not in_method: if isinstance(result, Dataset): - result._pipeline = inst._pipeline + [ + result._pipeline = inst_pipeline + [ (method, list(args[1:]), kwargs) ] elif isinstance(result, MultiDimensionalMapping): for key, element in result.items(): - element._pipeline = inst._pipeline + [ + element._pipeline = inst_pipeline + [ (method, list(args[1:]), kwargs), (getattr(type(result), '__getitem__'), [key], {}) ] diff --git a/holoviews/core/operation.py b/holoviews/core/operation.py index ad0026c9b1..b8ce1eb806 100644 --- a/holoviews/core/operation.py +++ b/holoviews/core/operation.py @@ -3,6 +3,7 @@ the purposes of analysis or visualization. """ import param +import copy from .dimension import ViewableElement from .element import Element from .layout import Layout @@ -118,13 +119,16 @@ def _apply(self, element, key=None): kwargs = {} for hook in self._preprocess_hooks: kwargs.update(hook(self, element)) + + element_pipeline = copy.copy(getattr(element, '_pipeline', None)) + ret = self._process(element, key) for hook in self._postprocess_hooks: ret = hook(self, ret, **kwargs) if isinstance(ret, Dataset) and isinstance(element, Dataset): ret._dataset = element.dataset.clone() - ret._pipeline = element.pipeline + [ + ret._pipeline = element_pipeline + [ (self.instance(), [], dict(self.p)) ] return ret From cb7cf8dbc13a2cd2199fb34912ad1c74e9568371 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Wed, 18 Sep 2019 17:23:27 -0400 Subject: [PATCH 14/23] Reset the dataset property and empty pipeline when clone replaces data --- holoviews/core/data/__init__.py | 12 ++++++------ holoviews/tests/core/testdatasetproperty.py | 6 ++++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index d419f27571..fe8d0b23da 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -995,15 +995,15 @@ def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides): datatypes = [self.interface.datatype] + self.datatype overrides['datatype'] = list(util.unique_iterator(datatypes)) - if 'dataset' not in overrides: - overrides['dataset'] = self.dataset - - if 'pipeline' not in overrides: - overrides['pipeline'] = self._pipeline - if data is None: overrides['_validate_vdims'] = False + if 'dataset' not in overrides: + overrides['dataset'] = self.dataset + + if 'pipeline' not in overrides: + overrides['pipeline'] = self._pipeline + new_dataset = super(Dataset, self).clone( data, shared_data, new_type, *args, **overrides ) diff --git a/holoviews/tests/core/testdatasetproperty.py b/holoviews/tests/core/testdatasetproperty.py index 4930116c02..2ea58fad73 100644 --- a/holoviews/tests/core/testdatasetproperty.py +++ b/holoviews/tests/core/testdatasetproperty.py @@ -148,6 +148,12 @@ def test_clone(self): # Execute pipeline self.assertEqual(curve.execute_pipeline(), curve) + def test_clone_new_data(self): + # Replacing data during clone resets .dataset + ds_clone = self.ds.clone(data=self.ds2.data) + self.assertEqual(ds_clone.dataset, self.ds2) + self.assertEqual(len(ds_clone.pipeline), 1) + class ReindexTestCase(DatasetPropertyTestCase): def test_reindex_dataset(self): From 931b7966fdba6dd2f098fa310026195e227a819d Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Thu, 19 Sep 2019 08:26:56 -0400 Subject: [PATCH 15/23] Propagate dataset property through clone when _in_method --- holoviews/core/data/__init__.py | 5 ++++- holoviews/core/data/interface.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index fe8d0b23da..1d9335a6f7 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -186,7 +186,6 @@ def __new__(mcs, classname, bases, classdict): classdict[method_name] = mcs.pipelined(method_fn) inst = type.__new__(mcs, classname, bases, classdict) - inst._in_method = False return inst @staticmethod @@ -255,6 +254,7 @@ class Dataset(Element): _kdim_reductions = {} def __init__(self, data, kdims=None, vdims=None, **kwargs): + self._in_method = False input_dataset = kwargs.pop('dataset', None) input_pipeline = kwargs.pop('pipeline', []) if isinstance(data, Element): @@ -1003,6 +1003,9 @@ def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides): if 'pipeline' not in overrides: overrides['pipeline'] = self._pipeline + elif self._in_method: + if 'dataset' not in overrides: + overrides['dataset'] = self.dataset new_dataset = super(Dataset, self).clone( data, shared_data, new_type, *args, **overrides diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index a7ef82222c..68f33f922d 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -46,11 +46,13 @@ def __init__(self, dataset): def __getitem__(self, index): from ..data import Dataset + self.dataset._in_method = True res = self._perform_getitem(self.dataset, index) if isinstance(res, Dataset): res._pipeline = self.dataset.pipeline + [ (getattr(type(self), '_perform_getitem'), [index], {}) ] + self.dataset._in_method = False return res @classmethod From 2d1cbaee84cd391bc07e92ec14eb6761f8dafcee Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Fri, 20 Sep 2019 05:45:09 -0400 Subject: [PATCH 16/23] support relabel in pipeline --- holoviews/core/data/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 1d9335a6f7..90c940a85f 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -1023,6 +1023,10 @@ def map(self, *args, **kwargs): return super(Dataset, self).map(*args, **kwargs) map.__doc__ = LabelledData.map.__doc__ + def relabel(self, *args, **kwargs): + return super(Dataset, self).relabel(*args, **kwargs) + relabel.__doc__ = LabelledData.relabel.__doc__ + @property def iloc(self): """Returns iloc indexer with support for columnar indexing. From fdfd538e78f6a484656a56e424db7c298740fb37 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sat, 21 Sep 2019 14:25:34 -0400 Subject: [PATCH 17/23] Convert pipeline to be a `chain` operation This removes the `execute_pipeline` method --- holoviews/core/accessors.py | 42 +- holoviews/core/data/__init__.py | 91 ++-- holoviews/core/data/interface.py | 16 +- holoviews/core/operation.py | 11 +- holoviews/operation/element.py | 34 +- holoviews/tests/core/testdatasetproperty.py | 461 ++++++++++---------- 6 files changed, 365 insertions(+), 290 deletions(-) diff --git a/holoviews/core/accessors.py b/holoviews/core/accessors.py index 5dac468d5a..382221a4ea 100644 --- a/holoviews/core/accessors.py +++ b/holoviews/core/accessors.py @@ -26,6 +26,7 @@ def __new__(mcs, classname, bases, classdict): @classmethod def pipelined(mcs, __call__): def pipelined_call(*args, **kwargs): + from ..operation.element import method as method_op, factory from .data import Dataset, MultiDimensionalMapping inst = args[0] if not hasattr(inst._obj, '_pipeline'): @@ -40,19 +41,40 @@ def pipelined_call(*args, **kwargs): result = __call__(*args, **kwargs) if not in_method: - mode = getattr(inst, 'mode', None) + init_op = factory.instance( + output_type=type(inst), + kwargs={'mode': getattr(inst, 'mode', None)}, + ) + call_op = method_op.instance( + input_type=type(inst), + method_name='__call__', + args=list(args[1:]), + kwargs=kwargs, + ) + if isinstance(result, Dataset): - result._pipeline = inst_pipeline + [ - (type(inst), [], {'mode': mode}), - (__call__, list(args[1:]), kwargs) - ] + result._pipeline = inst_pipeline.instance( + operations=inst_pipeline.operations + [ + init_op, call_op + ], + output_type=type(result), + group=result.group + ) elif isinstance(result, MultiDimensionalMapping): for key, element in result.items(): - element._pipeline = inst_pipeline + [ - (type(inst), [], {'mode': mode}), - (__call__, list(args[1:]), kwargs), - (getattr(type(result), '__getitem__'), [key], {}) - ] + getitem_op = method_op.instance( + input_type=type(result), + method_name='__getitem__', + args=[key], + ) + element._pipeline = inst_pipeline.instance( + operations=inst_pipeline.operations + [ + init_op, call_op, getitem_op + ], + output_type=type(result), + group=element.group + ) + inst._obj._in_method = False return result diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 90c940a85f..c03325f756 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -183,37 +183,57 @@ def __new__(mcs, classname, bases, classdict): if method_name in mcs.blacklist or method_name.startswith('_'): continue elif isinstance(method_fn, types.FunctionType): - classdict[method_name] = mcs.pipelined(method_fn) + classdict[method_name] = mcs.pipelined(method_fn, method_name) inst = type.__new__(mcs, classname, bases, classdict) return inst @staticmethod - def pipelined(method): + def pipelined(method_fn, method_name): def pipelined_fn(*args, **kwargs): + from ...operation.element import method as method_op inst = args[0] inst_pipeline = copy.copy(getattr(inst, '_pipeline', None)) in_method = inst._in_method if not in_method: inst._in_method = True - result = method(*args, **kwargs) + result = method_fn(*args, **kwargs) + + op = method_op.instance( + input_type=type(inst), + method_name=method_name, + args=list(args[1:]), + kwargs=kwargs, + ) if not in_method: if isinstance(result, Dataset): - result._pipeline = inst_pipeline + [ - (method, list(args[1:]), kwargs) - ] + result._pipeline = inst_pipeline.instance( + operations=inst_pipeline.operations + [op], + output_type=type(result), + group=result.group, + ) + elif isinstance(result, MultiDimensionalMapping): for key, element in result.items(): - element._pipeline = inst_pipeline + [ - (method, list(args[1:]), kwargs), - (getattr(type(result), '__getitem__'), [key], {}) - ] + if isinstance(element, Dataset): + getitem_op = method_op.instance( + input_type=type(result), + method_name='__getitem__', + args=[key] + ) + element._pipeline = inst_pipeline.instance( + operations=inst_pipeline.operations + [ + op, getitem_op + ], + output_type=type(result), + group=element.group + ) inst._in_method = False return result - pipelined_fn.__doc__ = method.__doc__ + pipelined_fn.__doc__ = method_fn.__doc__ return pipelined_fn @@ -254,9 +274,15 @@ class Dataset(Element): _kdim_reductions = {} def __init__(self, data, kdims=None, vdims=None, **kwargs): + from ...operation.element import ( + chain as chain_op, factory + ) self._in_method = False input_dataset = kwargs.pop('dataset', None) - input_pipeline = kwargs.pop('pipeline', []) + input_pipeline = kwargs.pop( + 'pipeline', None + ) + if isinstance(data, Element): pvals = util.get_param_values(data) kwargs.update([(l, pvals[l]) for l in ['group', 'label'] @@ -274,16 +300,23 @@ def __init__(self, data, kdims=None, vdims=None, **kwargs): self.redim = Redim(self, mode='dataset') # Handle _pipeline property - self._pipeline = input_pipeline + [( - type(self), - [], - kwargs, # includes kdims and vdims - )] + if input_pipeline is None: + input_pipeline = chain_op.instance() + + init_op = factory.instance( + output_type=type(self), + args=[], + kwargs=kwargs, + ) + self._pipeline = input_pipeline.instance( + operations=input_pipeline.operations + [init_op], + output_type=type(self), group=self.group + ) # Handle initializing the dataset property. self._dataset = None if input_dataset is not None: - self._dataset = input_dataset.clone(dataset=None, pipeline=[]) + self._dataset = input_dataset.clone(dataset=None, pipeline=None) elif type(self) is Dataset: self._dataset = self @@ -311,28 +344,6 @@ def pipeline(self): """ return self._pipeline - def execute_pipeline(self, data=None): - """ - Create a new object of the same type by executing the sequence of - operations that was used to create this object. - - Args: - data: Input data to the pipeline. If None, defaults to the value - of the dataset property and the resulting object will equal the - this object. - - Returns: - An object with the same type as this object - """ - new_dataset = self.dataset.clone(data=data, dataset=None, pipeline=[]) - result = new_dataset - for fn, a, kw in self._pipeline: - result = fn(result, *a, **kw) - - result._pipeline = copy.copy(self._pipeline) - result._dataset = new_dataset - return result - def closest(self, coords=[], **kwargs): """Snaps coordinate(s) to closest coordinate in Dataset diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index 68f33f922d..266c5e0076 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -46,12 +46,22 @@ def __init__(self, dataset): def __getitem__(self, index): from ..data import Dataset + from ...operation.element import method self.dataset._in_method = True res = self._perform_getitem(self.dataset, index) if isinstance(res, Dataset): - res._pipeline = self.dataset.pipeline + [ - (getattr(type(self), '_perform_getitem'), [index], {}) - ] + getitem_op = method.instance( + input_type=type(self), + output_type=type(self.dataset), + method_name='_perform_getitem', + args=[index], + ) + res._pipeline = self.dataset.pipeline.instance( + operations=self.dataset.pipeline.operations + [getitem_op], + group=self.dataset.group, + output_type=type(self.dataset) + ) + self.dataset._in_method = False return res diff --git a/holoviews/core/operation.py b/holoviews/core/operation.py index b8ce1eb806..a5c2fcbf98 100644 --- a/holoviews/core/operation.py +++ b/holoviews/core/operation.py @@ -120,7 +120,7 @@ def _apply(self, element, key=None): for hook in self._preprocess_hooks: kwargs.update(hook(self, element)) - element_pipeline = copy.copy(getattr(element, '_pipeline', None)) + element_pipeline = getattr(element, '_pipeline', None) ret = self._process(element, key) for hook in self._postprocess_hooks: @@ -128,9 +128,12 @@ def _apply(self, element, key=None): if isinstance(ret, Dataset) and isinstance(element, Dataset): ret._dataset = element.dataset.clone() - ret._pipeline = element_pipeline + [ - (self.instance(), [], dict(self.p)) - ] + ret._pipeline = element_pipeline.instance( + operations=element_pipeline.operations + [ + self.instance(**self.p) + ], + group=ret.group, + ) return ret diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index acedf62c34..79ba371ba8 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -84,8 +84,38 @@ class factory(Operation): By default, if three overlaid Images elements are supplied, the corresponding RGB element will be returned. """) + args = param.List(default=[], doc=""" + The list of positional argument to pass to the factory""") + + kwargs = param.Dict(default={}, doc=""" + The dict of keyword arguments to pass to the factory""") + def _process(self, view, key=None): - return self.p.output_type(view) + return self.p.output_type(view, *self.p.args, **self.p.kwargs) + + +class method(Operation): + """ + Operation that wraps a method call + """ + output_type = param.ClassSelector(class_=type, doc=""" + The output type of the method operation""") + + input_type = param.ClassSelector(class_=type, doc=""" + The object type the method is defined on""") + + method_name = param.String(default='__call__', doc=""" + The method name""") + + args = param.List(default=[], doc=""" + The list of positional argument to pass to the method""") + + kwargs = param.Dict(default={}, doc=""" + The dict of keyword arguments to pass to the method""") + + def _process(self, element, key=None): + fn = getattr(self.p.input_type, self.p.method_name) + return fn(element, *self.p.args, **self.p.kwargs) class chain(Operation): @@ -113,7 +143,6 @@ class chain(Operation): group = param.String(default='Chain', doc=""" The group assigned to the result after having applied the chain.""") - operations = param.List(default=[], class_=Operation, doc=""" A list of Operations (or Operation instances) that are applied on the input from left to right..""") @@ -163,7 +192,6 @@ def _process(self, img, key=None): return img.clone(processed, group=self.p.group) - class image_overlay(Operation): """ Operation to build a overlay of images to a specification from a diff --git a/holoviews/tests/core/testdatasetproperty.py b/holoviews/tests/core/testdatasetproperty.py index 2ea58fad73..6cb9e70095 100644 --- a/holoviews/tests/core/testdatasetproperty.py +++ b/holoviews/tests/core/testdatasetproperty.py @@ -45,10 +45,10 @@ def test_constructors_dataset(self): self.assertIs(ds, ds.dataset) # Check pipeline - pipeline = ds.pipeline - self.assertEqual(len(pipeline), 1) - self.assertIs(pipeline[0][0], Dataset) - self.assertEqual(ds, ds.execute_pipeline()) + ops = ds.pipeline.operations + self.assertEqual(len(ops), 1) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ds, ds.pipeline(ds.dataset)) def test_constructor_curve(self): element = Curve(self.df) @@ -61,9 +61,9 @@ def test_constructor_curve(self): # Check pipeline pipeline = element.pipeline - self.assertEqual(len(pipeline), 1) - self.assertIs(pipeline[0][0], Curve) - self.assertEqual(element, element.execute_pipeline()) + self.assertEqual(len(pipeline.operations), 1) + self.assertIs(pipeline.operations[0].output_type, Curve) + self.assertEqual(element, element.pipeline(element.dataset)) class ToTestCase(DatasetPropertyTestCase): @@ -78,15 +78,15 @@ def test_to_element(self): self.assertEqual(scatter.dataset, self.ds) # Check pipeline - pipeline = curve.pipeline - self.assertEqual(len(pipeline), 2) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Curve) + ops = curve.pipeline.operations + self.assertEqual(len(ops), 2) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) # Execute pipeline - self.assertEqual(curve.execute_pipeline(), curve) + self.assertEqual(curve.pipeline(curve.dataset), curve) self.assertEqual( - curve.execute_pipeline(self.ds2), curve2 + curve.pipeline(self.ds2), curve2 ) def test_to_holomap(self): @@ -102,7 +102,7 @@ def test_to_holomap(self): ) # execute pipeline - self.assertEqual(curve.execute_pipeline(), curve) + self.assertEqual(curve.pipeline(curve.dataset), curve) def test_to_holomap_dask(self): ddf = dd.from_pandas(self.df, npartitions=2) @@ -126,7 +126,7 @@ def test_to_holomap_dask(self): ) # Execute pipeline - self.assertEqual(curve.execute_pipeline(), curve) + self.assertEqual(curve.pipeline(curve.dataset), curve) class CloneTestCase(DatasetPropertyTestCase): @@ -143,16 +143,18 @@ def test_clone(self): ) # Check pipeline carried over - self.assertEqual(curve.pipeline, curve_clone.pipeline[:2]) + self.assertEqual( + curve.pipeline.operations, curve_clone.pipeline.operations[:2] + ) # Execute pipeline - self.assertEqual(curve.execute_pipeline(), curve) + self.assertEqual(curve.pipeline(curve.dataset), curve) def test_clone_new_data(self): # Replacing data during clone resets .dataset ds_clone = self.ds.clone(data=self.ds2.data) self.assertEqual(ds_clone.dataset, self.ds2) - self.assertEqual(len(ds_clone.pipeline), 1) + self.assertEqual(len(ds_clone.pipeline.operations), 1) class ReindexTestCase(DatasetPropertyTestCase): @@ -164,18 +166,17 @@ def test_reindex_dataset(self): self.assertEqual(ds_ab.dataset, self.ds) # Check pipeline - pipeline = ds_ab.pipeline - self.assertEqual(len(pipeline), 2) - self.assertIs(pipeline[0][0], Dataset) - self.assertTrue(callable(pipeline[1][0])) - self.assertEqual(pipeline[1][0].__name__, 'reindex') - self.assertEqual(pipeline[1][1], []) - self.assertEqual(pipeline[1][2], dict(kdims=['a'], vdims=['b'])) + ops = ds_ab.pipeline.operations + self.assertEqual(len(ops), 2) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'reindex') + self.assertEqual(ops[1].args, []) + self.assertEqual(ops[1].kwargs, dict(kdims=['a'], vdims=['b'])) # Execute pipeline - self.assertEqual(ds_ab.execute_pipeline(), ds_ab) + self.assertEqual(ds_ab.pipeline(ds_ab.dataset), ds_ab) self.assertEqual( - ds_ab.execute_pipeline(self.ds2), ds2_ab + ds_ab.pipeline(self.ds2), ds2_ab ) def test_double_reindex_dataset(self): @@ -190,21 +191,20 @@ def test_double_reindex_dataset(self): self.assertEqual(ds_ab.dataset, self.ds) # Check pipeline - pipeline = ds_ab.pipeline - self.assertEqual(len(pipeline), 3) - self.assertIs(pipeline[0][0], Dataset) - self.assertTrue(callable(pipeline[1][0])) - self.assertEqual(pipeline[1][0].__name__, 'reindex') - self.assertEqual(pipeline[1][1], []) - self.assertEqual(pipeline[1][2], dict(kdims=['a'], vdims=['b', 'c'])) - self.assertEqual(pipeline[2][0].__name__, 'reindex') - self.assertEqual(pipeline[2][1], []) - self.assertEqual(pipeline[2][2], dict(kdims=['a'], vdims=['b'])) + ops = ds_ab.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'reindex') + self.assertEqual(ops[1].args, []) + self.assertEqual(ops[1].kwargs, dict(kdims=['a'], vdims=['b', 'c'])) + self.assertEqual(ops[2].method_name, 'reindex') + self.assertEqual(ops[2].args, []) + self.assertEqual(ops[2].kwargs, dict(kdims=['a'], vdims=['b'])) # Execute pipeline - self.assertEqual(ds_ab.execute_pipeline(), ds_ab) + self.assertEqual(ds_ab.pipeline(ds_ab.dataset), ds_ab) self.assertEqual( - ds_ab.execute_pipeline(self.ds2), ds2_ab + ds_ab.pipeline(self.ds2), ds2_ab ) def test_reindex_curve(self): @@ -219,19 +219,18 @@ def test_reindex_curve(self): self.assertEqual(curve_ba.dataset, self.ds) # Check pipeline - pipeline = curve_ba.pipeline - self.assertEqual(len(pipeline), 3) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Curve) - self.assertTrue(callable(pipeline[2][0])) - self.assertEqual(pipeline[2][0].__name__, 'reindex') - self.assertEqual(pipeline[2][1], []) - self.assertEqual(pipeline[2][2], dict(kdims='b', vdims='a')) + ops = curve_ba.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertEqual(ops[2].method_name, 'reindex') + self.assertEqual(ops[2].args, []) + self.assertEqual(ops[2].kwargs, dict(kdims='b', vdims='a')) # Execute pipeline - self.assertEqual(curve_ba.execute_pipeline(), curve_ba) + self.assertEqual(curve_ba.pipeline(curve_ba.dataset), curve_ba) self.assertEqual( - curve_ba.execute_pipeline(self.ds2), curve2_ba + curve_ba.pipeline(self.ds2), curve2_ba ) def test_double_reindex_curve(self): @@ -246,22 +245,21 @@ def test_double_reindex_curve(self): self.assertEqual(curve_ba.dataset, self.ds) # Check pipeline - pipeline = curve_ba.pipeline - self.assertEqual(len(pipeline), 4) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Curve) - self.assertTrue(callable(pipeline[2][0])) - self.assertEqual(pipeline[2][0].__name__, 'reindex') - self.assertEqual(pipeline[2][1], []) - self.assertEqual(pipeline[2][2], dict(kdims='a', vdims='b')) - self.assertEqual(pipeline[3][0].__name__, 'reindex') - self.assertEqual(pipeline[3][1], []) - self.assertEqual(pipeline[3][2], dict(kdims='b', vdims='a')) + ops = curve_ba.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertEqual(ops[2].method_name, 'reindex') + self.assertEqual(ops[2].args, []) + self.assertEqual(ops[2].kwargs, dict(kdims='a', vdims='b')) + self.assertEqual(ops[3].method_name, 'reindex') + self.assertEqual(ops[3].args, []) + self.assertEqual(ops[3].kwargs, dict(kdims='b', vdims='a')) # Execute pipeline - self.assertEqual(curve_ba.execute_pipeline(), curve_ba) + self.assertEqual(curve_ba.pipeline(curve_ba.dataset), curve_ba) self.assertEqual( - curve_ba.execute_pipeline(self.ds2), curve2_ba + curve_ba.pipeline(self.ds2), curve2_ba ) @@ -278,18 +276,17 @@ def test_iloc_dataset(self): ) # Check pipeline - pipeline = ds_iloc.pipeline - self.assertEqual(len(pipeline), 2) - self.assertIs(pipeline[0][0], Dataset) - self.assertTrue(callable(pipeline[1][0])) - self.assertEqual(pipeline[1][0].__name__, '_perform_getitem') - self.assertEqual(pipeline[1][1], [[0, 2]]) - self.assertEqual(pipeline[1][2], {}) + ops = ds_iloc.pipeline.operations + self.assertEqual(len(ops), 2) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, '_perform_getitem') + self.assertEqual(ops[1].args, [[0, 2]]) + self.assertEqual(ops[1].kwargs, {}) # Execute pipeline - self.assertEqual(ds_iloc.execute_pipeline(), ds_iloc) + self.assertEqual(ds_iloc.pipeline(ds_iloc.dataset), ds_iloc) self.assertEqual( - ds_iloc.execute_pipeline(self.ds2), ds2_iloc + ds_iloc.pipeline(self.ds2), ds2_iloc ) def test_iloc_curve(self): @@ -304,19 +301,18 @@ def test_iloc_curve(self): ) # Check pipeline - pipeline = curve_iloc.pipeline - self.assertEqual(len(pipeline), 3) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Curve) - self.assertTrue(callable(pipeline[2][0])) - self.assertEqual(pipeline[2][0].__name__, '_perform_getitem') - self.assertEqual(pipeline[2][1], [[0, 2]]) - self.assertEqual(pipeline[2][2], {}) + ops = curve_iloc.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertEqual(ops[2].method_name, '_perform_getitem') + self.assertEqual(ops[2].args, [[0, 2]]) + self.assertEqual(ops[2].kwargs, {}) # Execute pipeline - self.assertEqual(curve_iloc.execute_pipeline(), curve_iloc) + self.assertEqual(curve_iloc.pipeline(curve_iloc.dataset), curve_iloc) self.assertEqual( - curve_iloc.execute_pipeline(self.ds2), curve2_iloc + curve_iloc.pipeline(self.ds2), curve2_iloc ) @@ -355,20 +351,21 @@ def test_ndloc_dataset(self): ) # Check pipeline - pipeline = ds_grid_ndloc.pipeline - self.assertEqual(len(pipeline), 2) - self.assertIs(pipeline[0][0], Dataset) - self.assertTrue(callable(pipeline[1][0])) - self.assertEqual(pipeline[1][0].__name__, '_perform_getitem') + ops = ds_grid_ndloc.pipeline.operations + self.assertEqual(len(ops), 2) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, '_perform_getitem') self.assertEqual( - pipeline[1][1], [(slice(0, 2, None), slice(1, 3, None))] + ops[1].args, [(slice(0, 2, None), slice(1, 3, None))] ) - self.assertEqual(pipeline[1][2], {}) + self.assertEqual(ops[1].kwargs, {}) # Execute pipeline - self.assertEqual(ds_grid_ndloc.execute_pipeline(), ds_grid_ndloc) self.assertEqual( - ds_grid_ndloc.execute_pipeline(self.ds2_grid), ds2_grid_ndloc + ds_grid_ndloc.pipeline(ds_grid_ndloc.dataset), ds_grid_ndloc + ) + self.assertEqual( + ds_grid_ndloc.pipeline(self.ds2_grid), ds2_grid_ndloc ) @@ -386,18 +383,17 @@ def test_select_dataset(self): ) # Check pipeline - pipeline = ds_select.pipeline - self.assertEqual(len(pipeline), 2) - self.assertIs(pipeline[0][0], Dataset) - self.assertTrue(callable(pipeline[1][0])) - self.assertEqual(pipeline[1][0].__name__, 'select') - self.assertEqual(pipeline[1][1], []) - self.assertEqual(pipeline[1][2], {'b': 10}) + ops = ds_select.pipeline.operations + self.assertEqual(len(ops), 2) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'select') + self.assertEqual(ops[1].args, []) + self.assertEqual(ops[1].kwargs, {'b': 10}) # Execute pipeline - self.assertEqual(ds_select.execute_pipeline(), ds_select) + self.assertEqual(ds_select.pipeline(ds_select.dataset), ds_select) self.assertEqual( - ds_select.execute_pipeline(self.ds2), ds2_select + ds_select.pipeline(self.ds2), ds2_select ) def test_select_curve(self): @@ -412,19 +408,20 @@ def test_select_curve(self): ) # Check pipeline - pipeline = curve_select.pipeline - self.assertEqual(len(pipeline), 3) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Curve) - self.assertTrue(callable(pipeline[2][0])) - self.assertEqual(pipeline[2][0].__name__, 'select') - self.assertEqual(pipeline[2][1], []) - self.assertEqual(pipeline[2][2], {'b': 10}) + ops = curve_select.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertEqual(ops[2].method_name, 'select') + self.assertEqual(ops[2].args, []) + self.assertEqual(ops[2].kwargs, {'b': 10}) # Execute pipeline - self.assertEqual(curve_select.execute_pipeline(), curve_select) self.assertEqual( - curve_select.execute_pipeline(self.ds2), curve2_select + curve_select.pipeline(curve_select.dataset), curve_select + ) + self.assertEqual( + curve_select.pipeline(self.ds2), curve2_select ) @@ -441,19 +438,20 @@ def test_sort_curve(self): ) # Check pipeline - pipeline = curve_sorted.pipeline - self.assertEqual(len(pipeline), 3) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Curve) - self.assertTrue(callable(pipeline[2][0])) - self.assertEqual(pipeline[2][0].__name__, 'sort') - self.assertEqual(pipeline[2][1], ['a']) - self.assertEqual(pipeline[2][2], {}) + ops = curve_sorted.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertEqual(ops[2].method_name, 'sort') + self.assertEqual(ops[2].args, ['a']) + self.assertEqual(ops[2].kwargs, {}) # Execute pipeline - self.assertEqual(curve_sorted.execute_pipeline(), curve_sorted) self.assertEqual( - curve_sorted.execute_pipeline(self.ds2), curve_sorted2 + curve_sorted.pipeline(curve_sorted.dataset), curve_sorted + ) + self.assertEqual( + curve_sorted.pipeline(self.ds2), curve_sorted2 ) @@ -470,19 +468,20 @@ def test_sample_curve(self): ) # Check pipeline - pipeline = curve_sampled.pipeline - self.assertEqual(len(pipeline), 3) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Curve) - self.assertTrue(callable(pipeline[2][0])) - self.assertEqual(pipeline[2][0].__name__, 'sample') - self.assertEqual(pipeline[2][1], [[1, 2]]) - self.assertEqual(pipeline[2][2], {}) + ops = curve_sampled.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertEqual(ops[2].method_name, 'sample') + self.assertEqual(ops[2].args, [[1, 2]]) + self.assertEqual(ops[2].kwargs, {}) # Execute pipeline - self.assertEqual(curve_sampled.execute_pipeline(), curve_sampled) self.assertEqual( - curve_sampled.execute_pipeline(self.ds2), curve_sampled2 + curve_sampled.pipeline(curve_sampled.dataset), curve_sampled + ) + self.assertEqual( + curve_sampled.pipeline(self.ds2), curve_sampled2 ) @@ -501,19 +500,18 @@ def test_reduce_dataset(self): self.assertEqual(ds2_reduced.dataset, self.ds2) # Check pipeline - pipeline = ds_reduced.pipeline - self.assertEqual(len(pipeline), 3) - self.assertIs(pipeline[0][0], Dataset) - self.assertTrue(callable(pipeline[1][0])) - self.assertEqual(pipeline[1][0].__name__, 'reindex') - self.assertEqual(pipeline[2][0].__name__, 'reduce') - self.assertEqual(pipeline[2][1], ['c']) - self.assertEqual(pipeline[2][2], {'function': np.sum}) + ops = ds_reduced.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'reindex') + self.assertEqual(ops[2].method_name, 'reduce') + self.assertEqual(ops[2].args, ['c']) + self.assertEqual(ops[2].kwargs, {'function': np.sum}) # Execute pipeline - self.assertEqual(ds_reduced.execute_pipeline(), ds_reduced) + self.assertEqual(ds_reduced.pipeline(ds_reduced.dataset), ds_reduced) self.assertEqual( - ds_reduced.execute_pipeline(self.ds2), ds2_reduced + ds_reduced.pipeline(self.ds2), ds2_reduced ) @@ -532,19 +530,20 @@ def test_aggregate_dataset(self): self.assertEqual(ds2_aggregated.dataset, self.ds2) # Check pipeline - pipeline = ds_aggregated.pipeline - self.assertEqual(len(pipeline), 3) - self.assertIs(pipeline[0][0], Dataset) - self.assertTrue(callable(pipeline[1][0])) - self.assertEqual(pipeline[1][0].__name__, 'reindex') - self.assertEqual(pipeline[2][0].__name__, 'aggregate') - self.assertEqual(pipeline[2][1], ['b']) - self.assertEqual(pipeline[2][2], {'function': np.sum}) + ops = ds_aggregated.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'reindex') + self.assertEqual(ops[2].method_name, 'aggregate') + self.assertEqual(ops[2].args, ['b']) + self.assertEqual(ops[2].kwargs, {'function': np.sum}) # Execute pipeline - self.assertEqual(ds_aggregated.execute_pipeline(), ds_aggregated) self.assertEqual( - ds_aggregated.execute_pipeline(self.ds2), ds2_aggregated + ds_aggregated.pipeline(ds_aggregated.dataset), ds_aggregated + ) + self.assertEqual( + ds_aggregated.pipeline(self.ds2), ds2_aggregated ) @@ -564,19 +563,19 @@ def test_groupby_dataset(self): ds2_group = ds2_groups[k] # Check pipeline - pipeline = ds_group.pipeline - self.assertNotEqual(len(pipeline), 3) - self.assertIs(pipeline[0][0], Dataset) - self.assertEqual(pipeline[1][0].__name__, 'reindex') - self.assertEqual(pipeline[2][0].__name__, 'groupby') - self.assertEqual(pipeline[2][1], ['b']) - self.assertEqual(pipeline[3][0].__name__, '__getitem__') - self.assertEqual(pipeline[3][1], [k]) + ops = ds_group.pipeline.operations + self.assertNotEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'reindex') + self.assertEqual(ops[2].method_name, 'groupby') + self.assertEqual(ops[2].args, ['b']) + self.assertEqual(ops[3].method_name, '__getitem__') + self.assertEqual(ops[3].args, [k]) # Execute pipeline - self.assertEqual(ds_group.execute_pipeline(), ds_group) + self.assertEqual(ds_group.pipeline(ds_group.dataset), ds_group) self.assertEqual( - ds_group.execute_pipeline(self.ds2), ds2_group + ds_group.pipeline(self.ds2), ds2_group ) @@ -591,21 +590,22 @@ def test_add_dimension_dataset(self): self.assertEqual(ds2_dim_added.dataset, self.ds2) # Check pipeline - pipeline = ds_dim_added.pipeline - self.assertEqual(len(pipeline), 2) - self.assertIs(pipeline[0][0], Dataset) - self.assertEqual(pipeline[1][0].__name__, 'add_dimension') - self.assertEqual(pipeline[1][1], ['new', 1, 17]) - self.assertEqual(pipeline[1][2], {}) + ops = ds_dim_added.pipeline.operations + self.assertEqual(len(ops), 2) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'add_dimension') + self.assertEqual(ops[1].args, ['new', 1, 17]) + self.assertEqual(ops[1].kwargs, {}) # Execute pipeline - self.assertEqual(ds_dim_added.execute_pipeline(), ds_dim_added) self.assertEqual( - ds_dim_added.execute_pipeline(self.ds2), ds2_dim_added, + ds_dim_added.pipeline(ds_dim_added.dataset), ds_dim_added + ) + self.assertEqual( + ds_dim_added.pipeline(self.ds2), ds2_dim_added, ) -# # Add execute pipeline test for each method, using a different dataset (ds2) # class HistogramTestCase(DatasetPropertyTestCase): @@ -625,18 +625,18 @@ def test_select_single(self): self.assertEqual(sub_hist.dataset, self.ds) # Check pipeline - pipeline = sub_hist.pipeline - self.assertEqual(len(pipeline), 4) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Apply) - self.assertEqual(pipeline[2][0].__name__, '__call__') - self.assertIsInstance(pipeline[2][1][0], histogram) - self.assertEqual(pipeline[3][0].__name__, 'select') - self.assertEqual(pipeline[3][1], []) - self.assertEqual(pipeline[3][2], {'a': (1, None)}) + ops = sub_hist.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Apply) + self.assertEqual(ops[2].method_name, '__call__') + self.assertIsInstance(ops[2].args[0], histogram) + self.assertEqual(ops[3].method_name, 'select') + self.assertEqual(ops[3].args, []) + self.assertEqual(ops[3].kwargs, {'a': (1, None)}) # Execute pipeline - self.assertEqual(sub_hist.execute_pipeline(), sub_hist) + self.assertEqual(sub_hist.pipeline(sub_hist.dataset), sub_hist) def test_select_multi(self): # Add second selection on b. b is a dimension in hist.dataset but @@ -656,34 +656,34 @@ def test_select_multi(self): ) # Check pipeline - pipeline = sub_hist.pipeline - self.assertEqual(len(pipeline), 4) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Apply) - self.assertEqual(pipeline[2][0].__name__, '__call__') - self.assertIsInstance(pipeline[2][1][0], histogram) - self.assertEqual(pipeline[3][0].__name__, 'select') - self.assertEqual(pipeline[3][1], []) - self.assertEqual(pipeline[3][2], {'a': (1, None), 'b': 100}) + ops = sub_hist.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Apply) + self.assertEqual(ops[2].method_name, '__call__') + self.assertIsInstance(ops[2].args[0], histogram) + self.assertEqual(ops[3].method_name, 'select') + self.assertEqual(ops[3].args, []) + self.assertEqual(ops[3].kwargs, {'a': (1, None), 'b': 100}) # Execute pipeline - self.assertEqual(sub_hist.execute_pipeline(), sub_hist) + self.assertEqual(sub_hist.pipeline(sub_hist.dataset), sub_hist) def test_hist_to_curve(self): # No exception thrown curve = self.hist.to.curve() # Check pipeline - pipeline = curve.pipeline - self.assertEqual(len(pipeline), 4) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Apply) - self.assertEqual(pipeline[2][0].__name__, '__call__') - self.assertIsInstance(pipeline[2][1][0], histogram) - self.assertIs(pipeline[3][0], Curve) + ops = curve.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Apply) + self.assertEqual(ops[2].method_name, '__call__') + self.assertIsInstance(ops[2].args[0], histogram) + self.assertIs(ops[3].output_type, Curve) # Execute pipeline - self.assertEqual(curve.execute_pipeline(), curve) + self.assertEqual(curve.pipeline(curve.dataset), curve) class DistributionTestCase(DatasetPropertyTestCase): @@ -697,7 +697,8 @@ def test_distribution_dataset(self): # Execute pipeline self.assertEqual( - self.distribution.execute_pipeline(), self.distribution + self.distribution.pipeline(self.distribution.dataset), + self.distribution, ) @@ -716,15 +717,15 @@ def test_rasterize_curve(self): self.assertEqual(img.dataset, self.ds) # Check pipeline - pipeline = img.pipeline - self.assertEqual(len(pipeline), 3) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Curve) - self.assertIsInstance(pipeline[2][0], rasterize) + ops = img.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertIsInstance(ops[2], rasterize) # Execute pipeline - self.assertEqual(img.execute_pipeline(), img) - self.assertEqual(img.execute_pipeline(self.ds2), img2) + self.assertEqual(img.pipeline(img.dataset), img) + self.assertEqual(img.pipeline(self.ds2), img2) def test_datashade_curve(self): rgb = dynspread(datashade( @@ -739,16 +740,16 @@ def test_datashade_curve(self): self.assertEqual(rgb.dataset, self.ds) # Check pipeline - pipeline = rgb.pipeline - self.assertEqual(len(pipeline), 4) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Curve) - self.assertIsInstance(pipeline[2][0], datashade) - self.assertIsInstance(pipeline[3][0], dynspread) + ops = rgb.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertIsInstance(ops[2], datashade) + self.assertIsInstance(ops[3], dynspread) # Execute pipeline - self.assertEqual(rgb.execute_pipeline(), rgb) - self.assertEqual(rgb.execute_pipeline(self.ds2), rgb2) + self.assertEqual(rgb.pipeline(rgb.dataset), rgb) + self.assertEqual(rgb.pipeline(self.ds2), rgb2) class AccessorTestCase(DatasetPropertyTestCase): @@ -762,18 +763,18 @@ def test_apply_curve(self): self.assertNotEqual(curve, curve2) # Check pipeline - pipeline = curve.pipeline - self.assertEqual(len(pipeline), 4) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Curve) - self.assertIs(pipeline[2][0], Apply) - self.assertEqual(pipeline[2][2], {'mode': None}) - self.assertEqual(pipeline[3][0].__name__, '__call__') + ops = curve.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertIs(ops[2].output_type, Apply) + self.assertEqual(ops[2].kwargs, {'mode': None}) + self.assertEqual(ops[3].method_name, '__call__') # Execute pipeline - self.assertEqual(curve.execute_pipeline(), curve) + self.assertEqual(curve.pipeline(curve.dataset), curve) self.assertEqual( - curve.execute_pipeline(self.ds2), curve2 + curve.pipeline(self.ds2), curve2 ) def test_redim_curve(self): @@ -787,16 +788,16 @@ def test_redim_curve(self): self.assertNotEqual(curve, curve2) # Check pipeline - pipeline = curve.pipeline - self.assertEqual(len(pipeline), 4) - self.assertIs(pipeline[0][0], Dataset) - self.assertIs(pipeline[1][0], Curve) - self.assertIs(pipeline[2][0], Redim) - self.assertEqual(pipeline[2][2], {'mode': 'dataset'}) - self.assertEqual(pipeline[3][0].__name__, '__call__') + ops = curve.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertIs(ops[2].output_type, Redim) + self.assertEqual(ops[2].kwargs, {'mode': 'dataset'}) + self.assertEqual(ops[3].method_name, '__call__') # Execute pipeline - self.assertEqual(curve.execute_pipeline(), curve) + self.assertEqual(curve.pipeline(curve.dataset), curve) self.assertEqual( - curve.execute_pipeline(self.ds2), curve2 + curve.pipeline(self.ds2), curve2 ) From 1ddb23778c92b5ec8313ef563f5088acfd772924 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sat, 21 Sep 2019 14:38:12 -0400 Subject: [PATCH 18/23] Fix tests --- holoviews/plotting/bokeh/stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/plotting/bokeh/stats.py b/holoviews/plotting/bokeh/stats.py index 2a00673a73..08f3707694 100644 --- a/holoviews/plotting/bokeh/stats.py +++ b/holoviews/plotting/bokeh/stats.py @@ -338,7 +338,7 @@ def _kde_data(self, el, key, **kwargs): if self.clip: vdim = vdim(range=self.clip) el = el.clone(vdims=[vdim]) - kde = univariate_kde(el, dimension=vdim, **kwargs) + kde = univariate_kde(el, dimension=vdim.name, **kwargs) xs, ys = (kde.dimension_values(i) for i in range(2)) mask = isfinite(ys) & (ys>0) # Mask out non-finite and zero values xs, ys = xs[mask], ys[mask] From a6108297a85ac944733d71c130338eb337487269 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sun, 22 Sep 2019 05:29:18 -0400 Subject: [PATCH 19/23] Update pipeline docstring --- holoviews/core/data/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index c03325f756..ee97df0844 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -338,9 +338,9 @@ def dataset(self): @property def pipeline(self): """ - List of (function, args, kwargs) tuples that represents the sequence - of operations that was used to create this object, starting - with the Dataset stored in dataset property + Chain operation that evaluates the sequence of operations that was + used to create this object, starting with the Dataset stored in + dataset property """ return self._pipeline From 1ac15d3865cb3a2b26d011dbc1159dd794b482c6 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sun, 22 Sep 2019 05:32:12 -0400 Subject: [PATCH 20/23] Remove execute_pipeline from blacklist --- holoviews/core/data/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index ee97df0844..d918862fe9 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -174,7 +174,7 @@ def __call__(self, new_type, kdims=None, vdims=None, groupby=None, class PipelineMeta(ParameterizedMetaclass): # Public methods that should not be wrapped - blacklist = ['__init__', 'clone', 'execute_pipeline'] + blacklist = ['__init__', 'clone'] def __new__(mcs, classname, bases, classdict): From c2a5a96c3155dca9541e8042a77997eaf6b3ee40 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sun, 22 Sep 2019 06:09:10 -0400 Subject: [PATCH 21/23] Use try/finally when setting _in_method to avoid inconsistent state in the presence of exceptions --- holoviews/core/accessors.py | 66 +++++++++++++++--------------- holoviews/core/data/__init__.py | 69 +++++++++++++++++--------------- holoviews/core/data/interface.py | 36 +++++++++-------- 3 files changed, 90 insertions(+), 81 deletions(-) diff --git a/holoviews/core/accessors.py b/holoviews/core/accessors.py index 382221a4ea..fdfd9afd8c 100644 --- a/holoviews/core/accessors.py +++ b/holoviews/core/accessors.py @@ -20,7 +20,6 @@ def __new__(mcs, classname, bases, classdict): classdict['__call__'] = mcs.pipelined(classdict['__call__']) inst = type.__new__(mcs, classname, bases, classdict) - inst._in_method = False return inst @classmethod @@ -38,44 +37,47 @@ def pipelined_call(*args, **kwargs): if not in_method: inst._obj._in_method = True - result = __call__(*args, **kwargs) + try: + result = __call__(*args, **kwargs) - if not in_method: - init_op = factory.instance( - output_type=type(inst), - kwargs={'mode': getattr(inst, 'mode', None)}, - ) - call_op = method_op.instance( - input_type=type(inst), - method_name='__call__', - args=list(args[1:]), - kwargs=kwargs, - ) - - if isinstance(result, Dataset): - result._pipeline = inst_pipeline.instance( - operations=inst_pipeline.operations + [ - init_op, call_op - ], - output_type=type(result), - group=result.group + if not in_method: + init_op = factory.instance( + output_type=type(inst), + kwargs={'mode': getattr(inst, 'mode', None)}, ) - elif isinstance(result, MultiDimensionalMapping): - for key, element in result.items(): - getitem_op = method_op.instance( - input_type=type(result), - method_name='__getitem__', - args=[key], - ) - element._pipeline = inst_pipeline.instance( + call_op = method_op.instance( + input_type=type(inst), + method_name='__call__', + args=list(args[1:]), + kwargs=kwargs, + ) + + if isinstance(result, Dataset): + result._pipeline = inst_pipeline.instance( operations=inst_pipeline.operations + [ - init_op, call_op, getitem_op + init_op, call_op ], output_type=type(result), - group=element.group + group=result.group ) + elif isinstance(result, MultiDimensionalMapping): + for key, element in result.items(): + getitem_op = method_op.instance( + input_type=type(result), + method_name='__getitem__', + args=[key], + ) + element._pipeline = inst_pipeline.instance( + operations=inst_pipeline.operations + [ + init_op, call_op, getitem_op + ], + output_type=type(result), + group=element.group + ) + finally: + if not in_method: + inst._obj._in_method = False - inst._obj._in_method = False return result pipelined_call.__doc__ = __call__.__doc__ diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index d918862fe9..ec09222293 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -198,39 +198,42 @@ def pipelined_fn(*args, **kwargs): if not in_method: inst._in_method = True - result = method_fn(*args, **kwargs) - - op = method_op.instance( - input_type=type(inst), - method_name=method_name, - args=list(args[1:]), - kwargs=kwargs, - ) - - if not in_method: - if isinstance(result, Dataset): - result._pipeline = inst_pipeline.instance( - operations=inst_pipeline.operations + [op], - output_type=type(result), - group=result.group, - ) - - elif isinstance(result, MultiDimensionalMapping): - for key, element in result.items(): - if isinstance(element, Dataset): - getitem_op = method_op.instance( - input_type=type(result), - method_name='__getitem__', - args=[key] - ) - element._pipeline = inst_pipeline.instance( - operations=inst_pipeline.operations + [ - op, getitem_op - ], - output_type=type(result), - group=element.group - ) - inst._in_method = False + try: + result = method_fn(*args, **kwargs) + + op = method_op.instance( + input_type=type(inst), + method_name=method_name, + args=list(args[1:]), + kwargs=kwargs, + ) + + if not in_method: + if isinstance(result, Dataset): + result._pipeline = inst_pipeline.instance( + operations=inst_pipeline.operations + [op], + output_type=type(result), + group=result.group, + ) + + elif isinstance(result, MultiDimensionalMapping): + for key, element in result.items(): + if isinstance(element, Dataset): + getitem_op = method_op.instance( + input_type=type(result), + method_name='__getitem__', + args=[key] + ) + element._pipeline = inst_pipeline.instance( + operations=inst_pipeline.operations + [ + op, getitem_op + ], + output_type=type(result), + group=element.group + ) + finally: + if not in_method: + inst._in_method = False return result pipelined_fn.__doc__ = method_fn.__doc__ diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index 266c5e0076..819dbf408d 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -47,22 +47,26 @@ def __init__(self, dataset): def __getitem__(self, index): from ..data import Dataset from ...operation.element import method - self.dataset._in_method = True - res = self._perform_getitem(self.dataset, index) - if isinstance(res, Dataset): - getitem_op = method.instance( - input_type=type(self), - output_type=type(self.dataset), - method_name='_perform_getitem', - args=[index], - ) - res._pipeline = self.dataset.pipeline.instance( - operations=self.dataset.pipeline.operations + [getitem_op], - group=self.dataset.group, - output_type=type(self.dataset) - ) - - self.dataset._in_method = False + in_method = self.dataset._in_method + if not in_method: + self.dataset._in_method = True + try: + res = self._perform_getitem(self.dataset, index) + if not in_method and isinstance(res, Dataset): + getitem_op = method.instance( + input_type=type(self), + output_type=type(self.dataset), + method_name='_perform_getitem', + args=[index], + ) + res._pipeline = self.dataset.pipeline.instance( + operations=self.dataset.pipeline.operations + [getitem_op], + group=self.dataset.group, + output_type=type(self.dataset) + ) + finally: + if not in_method: + self.dataset._in_method = False return res @classmethod From 50bd22aedac7ed0bd459eec2600fcb4c115c8fea Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Mon, 23 Sep 2019 06:00:36 -0400 Subject: [PATCH 22/23] Make chain operation default to group of element produced by last operation --- holoviews/core/accessors.py | 2 -- holoviews/core/data/__init__.py | 4 +--- holoviews/core/data/interface.py | 1 - holoviews/core/operation.py | 1 - holoviews/operation/element.py | 10 +++++++--- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/holoviews/core/accessors.py b/holoviews/core/accessors.py index fdfd9afd8c..212494f14a 100644 --- a/holoviews/core/accessors.py +++ b/holoviews/core/accessors.py @@ -58,7 +58,6 @@ def pipelined_call(*args, **kwargs): init_op, call_op ], output_type=type(result), - group=result.group ) elif isinstance(result, MultiDimensionalMapping): for key, element in result.items(): @@ -72,7 +71,6 @@ def pipelined_call(*args, **kwargs): init_op, call_op, getitem_op ], output_type=type(result), - group=element.group ) finally: if not in_method: diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index ec09222293..441c0537ce 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -213,7 +213,6 @@ def pipelined_fn(*args, **kwargs): result._pipeline = inst_pipeline.instance( operations=inst_pipeline.operations + [op], output_type=type(result), - group=result.group, ) elif isinstance(result, MultiDimensionalMapping): @@ -229,7 +228,6 @@ def pipelined_fn(*args, **kwargs): op, getitem_op ], output_type=type(result), - group=element.group ) finally: if not in_method: @@ -313,7 +311,7 @@ def __init__(self, data, kdims=None, vdims=None, **kwargs): ) self._pipeline = input_pipeline.instance( operations=input_pipeline.operations + [init_op], - output_type=type(self), group=self.group + output_type=type(self), ) # Handle initializing the dataset property. diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index 819dbf408d..7ddf63454f 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -61,7 +61,6 @@ def __getitem__(self, index): ) res._pipeline = self.dataset.pipeline.instance( operations=self.dataset.pipeline.operations + [getitem_op], - group=self.dataset.group, output_type=type(self.dataset) ) finally: diff --git a/holoviews/core/operation.py b/holoviews/core/operation.py index a5c2fcbf98..e1d288d9b9 100644 --- a/holoviews/core/operation.py +++ b/holoviews/core/operation.py @@ -132,7 +132,6 @@ def _apply(self, element, key=None): operations=element_pipeline.operations + [ self.instance(**self.p) ], - group=ret.group, ) return ret diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index 79ba371ba8..dd9ad05934 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -140,8 +140,9 @@ class chain(Operation): The output type of the chain operation. Must be supplied if the chain is to be used as a channel operation.""") - group = param.String(default='Chain', doc=""" - The group assigned to the result after having applied the chain.""") + group = param.String(default='', doc=""" + The group assigned to the result after having applied the chain. + Defaults to the group produced by the last operation in the chain""") operations = param.List(default=[], class_=Operation, doc=""" A list of Operations (or Operation instances) @@ -153,7 +154,10 @@ def _process(self, view, key=None): processed = operation.process_element(processed, key, input_ranges=self.p.input_ranges) - return processed.clone(group=self.p.group) + if not self.p.group: + return processed + else: + return processed.clone(group=self.p.group) class transform(Operation): From ba9b4df7d8edc915b88415b09a50472e43529824 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Mon, 23 Sep 2019 06:05:31 -0400 Subject: [PATCH 23/23] unused import --- holoviews/core/operation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/holoviews/core/operation.py b/holoviews/core/operation.py index e1d288d9b9..ddccd6d947 100644 --- a/holoviews/core/operation.py +++ b/holoviews/core/operation.py @@ -3,7 +3,6 @@ the purposes of analysis or visualization. """ import param -import copy from .dimension import ViewableElement from .element import Element from .layout import Layout