diff --git a/holoviews/core/accessors.py b/holoviews/core/accessors.py index 2e30020da3..212494f14a 100644 --- a/holoviews/core/accessors.py +++ b/holoviews/core/accessors.py @@ -5,13 +5,85 @@ from collections import OrderedDict from types import FunctionType +import copy import param +from param.parameterized import add_metaclass from . import util from .pprint import PrettyPrinter +class AccessorPipelineMeta(type): + def __new__(mcs, classname, bases, classdict): + if '__call__' in classdict: + classdict['__call__'] = mcs.pipelined(classdict['__call__']) + + inst = type.__new__(mcs, classname, bases, classdict) + return inst + + @classmethod + def pipelined(mcs, __call__): + def pipelined_call(*args, **kwargs): + from ..operation.element import method as method_op, factory + from .data import Dataset, MultiDimensionalMapping + inst = args[0] + if not hasattr(inst._obj, '_pipeline'): + # Wrapped object doesn't support the pipeline property + return __call__(*args, **kwargs) + + inst_pipeline = copy.copy(inst._obj. _pipeline) + in_method = inst._obj._in_method + if not in_method: + inst._obj._in_method = True + + try: + result = __call__(*args, **kwargs) + + if not in_method: + init_op = factory.instance( + output_type=type(inst), + kwargs={'mode': getattr(inst, 'mode', None)}, + ) + call_op = method_op.instance( + input_type=type(inst), + method_name='__call__', + args=list(args[1:]), + kwargs=kwargs, + ) + + if isinstance(result, Dataset): + result._pipeline = inst_pipeline.instance( + operations=inst_pipeline.operations + [ + init_op, call_op + ], + output_type=type(result), + ) + elif isinstance(result, MultiDimensionalMapping): + for key, element in result.items(): + getitem_op = method_op.instance( + input_type=type(result), + method_name='__getitem__', + args=[key], + ) + element._pipeline = inst_pipeline.instance( + operations=inst_pipeline.operations + [ + init_op, call_op, getitem_op + ], + output_type=type(result), + ) + finally: + if not in_method: + inst._obj._in_method = False + + return result + + pipelined_call.__doc__ = __call__.__doc__ + + return pipelined_call + + +@add_metaclass(AccessorPipelineMeta) class Apply(object): """ Utility to apply a function or operation to all viewable elements @@ -113,7 +185,7 @@ def function(object, **kwargs): mapped.append((k, new_val)) return self._obj.clone(mapped, link=link_inputs) - + def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs): """Applies a aggregate function to all ViewableElements. @@ -150,7 +222,7 @@ def select(self, **kwargs): return self.__call__('select', **kwargs) - +@add_metaclass(AccessorPipelineMeta) class Redim(object): """ Utility that supports re-dimensioning any HoloViews object via the @@ -177,7 +249,7 @@ def replace_dimensions(cls, dimensions, overrides): list: List of dimensions with replacements applied """ from .dimension import Dimension - + replaced = [] for d in dimensions: if d.name in overrides: @@ -305,7 +377,7 @@ def values(self, specs=None, **ranges): return self._redim('values', specs, **ranges) - +@add_metaclass(AccessorPipelineMeta) class Opts(object): def __init__(self, obj, mode=None): diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index d0a5fc6ab8..441c0537ce 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -5,16 +5,21 @@ except ImportError: pass +import types +import copy import numpy as np import param +from param.parameterized import add_metaclass, ParameterizedMetaclass from .. import util from ..accessors import Redim -from ..dimension import Dimension, process_dimensions +from ..dimension import ( + Dimension, process_dimensions, Dimensioned, LabelledData +) from ..element import Element -from ..ndmapping import OrderedDict +from ..ndmapping import OrderedDict, MultiDimensionalMapping from ..spaces import HoloMap, DynamicMap -from .interface import Interface, iloc, ndloc, DataError +from .interface import Interface, iloc, ndloc from .array import ArrayInterface from .dictionary import DictInterface from .grid import GridInterface @@ -155,6 +160,7 @@ def __call__(self, new_type, kdims=None, vdims=None, groupby=None, if len(kdims) == selected.ndims or not groupby: # Propagate dataset params['dataset'] = self._element.dataset + params['pipeline'] = self._element._pipeline element = new_type(selected, **params) return element.sort() if sort else element group = selected.groupby(groupby, container_type=HoloMap, @@ -165,7 +171,75 @@ def __call__(self, new_type, kdims=None, vdims=None, groupby=None, return group +class PipelineMeta(ParameterizedMetaclass): + # Public methods that should not be wrapped + blacklist = ['__init__', 'clone'] + + def __new__(mcs, classname, bases, classdict): + + for method_name in classdict: + method_fn = classdict[method_name] + if method_name in mcs.blacklist or method_name.startswith('_'): + continue + elif isinstance(method_fn, types.FunctionType): + classdict[method_name] = mcs.pipelined(method_fn, method_name) + + inst = type.__new__(mcs, classname, bases, classdict) + return inst + + @staticmethod + def pipelined(method_fn, method_name): + def pipelined_fn(*args, **kwargs): + from ...operation.element import method as method_op + inst = args[0] + inst_pipeline = copy.copy(getattr(inst, '_pipeline', None)) + in_method = inst._in_method + if not in_method: + inst._in_method = True + + try: + result = method_fn(*args, **kwargs) + + op = method_op.instance( + input_type=type(inst), + method_name=method_name, + args=list(args[1:]), + kwargs=kwargs, + ) + + if not in_method: + if isinstance(result, Dataset): + result._pipeline = inst_pipeline.instance( + operations=inst_pipeline.operations + [op], + output_type=type(result), + ) + + elif isinstance(result, MultiDimensionalMapping): + for key, element in result.items(): + if isinstance(element, Dataset): + getitem_op = method_op.instance( + input_type=type(result), + method_name='__getitem__', + args=[key] + ) + element._pipeline = inst_pipeline.instance( + operations=inst_pipeline.operations + [ + op, getitem_op + ], + output_type=type(result), + ) + finally: + if not in_method: + inst._in_method = False + return result + + pipelined_fn.__doc__ = method_fn.__doc__ + + return pipelined_fn + + +@add_metaclass(PipelineMeta) class Dataset(Element): """ Dataset provides a general baseclass for Element types that @@ -201,6 +275,15 @@ class Dataset(Element): _kdim_reductions = {} def __init__(self, data, kdims=None, vdims=None, **kwargs): + from ...operation.element import ( + chain as chain_op, factory + ) + self._in_method = False + input_dataset = kwargs.pop('dataset', None) + input_pipeline = kwargs.pop( + 'pipeline', None + ) + if isinstance(data, Element): pvals = util.get_param_values(data) kwargs.update([(l, pvals[l]) for l in ['group', 'label'] @@ -217,6 +300,50 @@ def __init__(self, data, kdims=None, vdims=None, **kwargs): self.redim = Redim(self, mode='dataset') + # Handle _pipeline property + if input_pipeline is None: + input_pipeline = chain_op.instance() + + init_op = factory.instance( + output_type=type(self), + args=[], + kwargs=kwargs, + ) + self._pipeline = input_pipeline.instance( + operations=input_pipeline.operations + [init_op], + output_type=type(self), + ) + + # Handle initializing the dataset property. + self._dataset = None + if input_dataset is not None: + self._dataset = input_dataset.clone(dataset=None, pipeline=None) + + elif type(self) is Dataset: + self._dataset = self + + @property + def dataset(self): + """ + The Dataset that this object was created from + """ + from . import Dataset + if self._dataset is None: + dataset = Dataset(self, _validate_vdims=False) + if hasattr(self, '_binned'): + dataset._binned = self._binned + return dataset + else: + return self._dataset + + @property + def pipeline(self): + """ + Chain operation that evaluates the sequence of operations that was + used to create this object, starting with the Dataset stored in + dataset property + """ + return self._pipeline def closest(self, coords=[], **kwargs): """Snaps coordinate(s) to closest coordinate in Dataset @@ -880,23 +1007,38 @@ def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides): datatypes = [self.interface.datatype] + self.datatype overrides['datatype'] = list(util.unique_iterator(datatypes)) - if 'dataset' in overrides: - dataset = overrides.pop('dataset') - else: - dataset = self.dataset + if data is None: + overrides['_validate_vdims'] = False - new_dataset = super(Dataset, self).clone(data, shared_data, new_type, *args, **overrides) + if 'dataset' not in overrides: + overrides['dataset'] = self.dataset - if dataset is not None: - try: - new_dataset._dataset = dataset.clone(data=new_dataset.data, dataset=None) - except DataError: - # New dataset doesn't have the necessary dimensions to - # propagate dataset. Do nothing - pass + if 'pipeline' not in overrides: + overrides['pipeline'] = self._pipeline + elif self._in_method: + if 'dataset' not in overrides: + overrides['dataset'] = self.dataset + + new_dataset = super(Dataset, self).clone( + data, shared_data, new_type, *args, **overrides + ) return new_dataset + # Overrides of superclass methods that are needed so that PipelineMeta + # will find them to wrap with pipeline support + def options(self, *args, **kwargs): + return super(Dataset, self).options(*args, **kwargs) + options.__doc__ = Dimensioned.options.__doc__ + + def map(self, *args, **kwargs): + return super(Dataset, self).map(*args, **kwargs) + map.__doc__ = LabelledData.map.__doc__ + + def relabel(self, *args, **kwargs): + return super(Dataset, self).relabel(*args, **kwargs) + relabel.__doc__ = LabelledData.relabel.__doc__ + @property def iloc(self): """Returns iloc indexer with support for columnar indexing. diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index c97b2858c0..7ddf63454f 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -40,7 +40,40 @@ def __init__(self, msg, interface=None): super(DataError, self).__init__(msg) -class iloc(object): +class Accessor(object): + def __init__(self, dataset): + self.dataset = dataset + + def __getitem__(self, index): + from ..data import Dataset + from ...operation.element import method + in_method = self.dataset._in_method + if not in_method: + self.dataset._in_method = True + try: + res = self._perform_getitem(self.dataset, index) + if not in_method and isinstance(res, Dataset): + getitem_op = method.instance( + input_type=type(self), + output_type=type(self.dataset), + method_name='_perform_getitem', + args=[index], + ) + res._pipeline = self.dataset.pipeline.instance( + operations=self.dataset.pipeline.operations + [getitem_op], + output_type=type(self.dataset) + ) + finally: + if not in_method: + self.dataset._in_method = False + return res + + @classmethod + def _perform_getitem(cls, dataset, index): + raise NotImplementedError() + + +class iloc(Accessor): """ iloc is small wrapper object that allows row, column based indexing into a Dataset using the ``.iloc`` property. It supports @@ -48,11 +81,8 @@ class iloc(object): integer indices, slices, lists and arrays of values. For more information see the ``Dataset.iloc`` property docstring. """ - - def __init__(self, dataset): - self.dataset = dataset - - def __getitem__(self, index): + @classmethod + def _perform_getitem(cls, dataset, index): index = util.wrap_tuple(index) if len(index) == 1: index = (index[0], slice(None)) @@ -63,32 +93,32 @@ def __getitem__(self, index): rows, cols = index if rows is Ellipsis: rows = slice(None) - data = self.dataset.interface.iloc(self.dataset.dataset, (rows, cols)) - kdims = self.dataset.kdims - vdims = self.dataset.vdims + data = dataset.interface.iloc(dataset.dataset, (rows, cols)) + kdims = dataset.kdims + vdims = dataset.vdims if np.isscalar(data): return data elif cols == slice(None): pass else: if isinstance(cols, slice): - dims = self.dataset.dimensions()[index[1]] + dims = dataset.dimensions()[index[1]] elif np.isscalar(cols): - dims = [self.dataset.get_dimension(cols)] + dims = [dataset.get_dimension(cols)] else: - dims = [self.dataset.get_dimension(d) for d in cols] + dims = [dataset.get_dimension(d) for d in cols] kdims = [d for d in dims if d in kdims] vdims = [d for d in dims if d in vdims] - datatype = [dt for dt in self.dataset.datatype + datatype = [dt for dt in dataset.datatype if dt in Interface.interfaces and not Interface.interfaces[dt].gridded] if not datatype: datatype = ['dataframe', 'dictionary'] - return self.dataset.clone(data, kdims=kdims, vdims=vdims, - datatype=datatype) + return dataset.clone(data, kdims=kdims, vdims=vdims, + datatype=datatype) -class ndloc(object): +class ndloc(Accessor): """ ndloc is a small wrapper object that allows ndarray-like indexing for gridded Datasets using the ``.ndloc`` property. It supports @@ -96,22 +126,19 @@ class ndloc(object): integer indices, slices, lists and arrays of values. For more information see the ``Dataset.ndloc`` property docstring. """ - - def __init__(self, dataset): - self.dataset = dataset - - def __getitem__(self, indices): - ds = self.dataset + @classmethod + def _perform_getitem(cls, dataset, indices): + ds = dataset indices = util.wrap_tuple(indices) if not ds.interface.gridded: raise IndexError('Cannot use ndloc on non nd-dimensional datastructure') - selected = self.dataset.interface.ndloc(ds, indices) + selected = dataset.interface.ndloc(ds, indices) if np.isscalar(selected): return selected params = {} if hasattr(ds, 'bounds'): params['bounds'] = None - return self.dataset.clone(selected, datatype=[ds.interface.datatype]+ds.datatype, **params) + return dataset.clone(selected, datatype=[ds.interface.datatype]+ds.datatype, **params) class Interface(param.Parameterized): diff --git a/holoviews/core/data/multipath.py b/holoviews/core/data/multipath.py index b0ff823a29..fa4053683c 100644 --- a/holoviews/core/data/multipath.py +++ b/holoviews/core/data/multipath.py @@ -59,7 +59,7 @@ def validate(cls, dataset, vdims=True): return from holoviews.element import Polygons - ds = cls._inner_dataset_template(dataset) + ds = cls._inner_dataset_template(dataset, validate_vdims=vdims) for d in dataset.data: ds.data = d ds.interface.validate(ds, vdims) @@ -76,7 +76,7 @@ def validate(cls, dataset, vdims=True): @classmethod - def _inner_dataset_template(cls, dataset): + def _inner_dataset_template(cls, dataset, validate_vdims=True): """ Returns a Dataset template used as a wrapper around the data contained within the multi-interface dataset. @@ -84,7 +84,8 @@ def _inner_dataset_template(cls, dataset): from . import Dataset vdims = dataset.vdims if getattr(dataset, 'level', None) is None else [] return Dataset(dataset.data[0], datatype=cls.subtypes, - kdims=dataset.kdims, vdims=vdims) + kdims=dataset.kdims, vdims=vdims, + _validate_vdims=validate_vdims) @classmethod def dimension_type(cls, dataset, dim): diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index 70415f151f..887d3575e7 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -486,41 +486,8 @@ def __init__(self, data, id=None, plot_id=None, **params): This class also has an id instance attribute, which may be set to associate some custom options with the object. """ - from . import Dataset, DataError self.data = data - # Handle initializing the dataset property. - self._dataset = None - input_dataset = params.pop('dataset', None) - if type(self) is Dataset: - self._dataset = self - elif input_dataset is not None: - # Clone dimension info from input dataset with reference to new - # data. This way we keep the metadata for all of the dimensions. - try: - self._dataset = input_dataset.clone(data=self.data) - except DataError: - # Dataset not compatible with input data - pass - if self._dataset is None: - # Create a default Dataset to wrap input data - try: - kdims = list(params.get('kdims', [])) - vdims = list(params.get('vdims', [])) - dims = kdims + vdims - dataset = Dataset( - self.data, - kdims=dims if dims else None - ) - if len(dataset.dimensions()) == 0: - # No dimensions could be auto-detected in data - raise DataError("No dimensions detected") - self._dataset = dataset - except DataError: - # Data not supported by any storage backend. leave _dataset as - # None - pass - self._id = None self.id = id self._plot_id = plot_id or util.builtins.id(self) @@ -542,10 +509,6 @@ def __init__(self, data, id=None, plot_id=None, **params): raise ValueError("Supplied label %r contains invalid characters." % self.label) - @property - def dataset(self): - return self._dataset - @property def id(self): return self._id diff --git a/holoviews/core/operation.py b/holoviews/core/operation.py index 85902fff84..ddccd6d947 100644 --- a/holoviews/core/operation.py +++ b/holoviews/core/operation.py @@ -8,7 +8,7 @@ from .layout import Layout from .overlay import NdOverlay, Overlay from .spaces import Callable, HoloMap -from . import util +from . import util, Dataset class Operation(param.ParameterizedFunction): @@ -118,9 +118,20 @@ def _apply(self, element, key=None): kwargs = {} for hook in self._preprocess_hooks: kwargs.update(hook(self, element)) + + element_pipeline = getattr(element, '_pipeline', None) + ret = self._process(element, key) for hook in self._postprocess_hooks: ret = hook(self, ret, **kwargs) + + if isinstance(ret, Dataset) and isinstance(element, Dataset): + ret._dataset = element.dataset.clone() + ret._pipeline = element_pipeline.instance( + operations=element_pipeline.operations + [ + self.instance(**self.p) + ], + ) return ret diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 2b36f33aa4..662653c8c5 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -1,5 +1,4 @@ import numpy as np -import copy import param from ..streams import BoundsXY @@ -225,61 +224,8 @@ def __init__(self, data, edges=None, **params): elif isinstance(data, tuple) and len(data) == 2 and len(data[0])+1 == len(data[1]): data = data[::-1] - self._operation_kwargs = params.pop('_operation_kwargs', None) - - dataset = params.pop("dataset", None) super(Histogram, self).__init__(data, **params) - if dataset: - # Histogram is a special case in which we keep the data from the - # input dataset rather than replace it with the element data. - # This is so that dataset contains the data needed to reconstruct - # the element. - self._dataset = dataset.clone() - - def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides): - if 'dataset' in overrides: - dataset = overrides.pop('dataset', None) - else: - dataset = self.dataset - - overrides["dataset"] = None - - new_element = super(Histogram, self).clone( - data=data, - shared_data=shared_data, - new_type=new_type, - _operation_kwargs=copy.deepcopy(self._operation_kwargs), - *args, - **overrides - ) - - if dataset: - # Histogram is a special case in which we keep the data from the - # input dataset rather than replace it with the element data. - # This is so that dataset contains the data needed to reconstruct - # the element. - new_element._dataset = dataset.clone() - - return new_element - - def select(self, selection_specs=None, **selection): - selected = super(Histogram, self).select( - selection_specs=selection_specs, **selection - ) - - if not np.isscalar(selected) and not np.array_equal(selected.data, self.data): - # Selection changed histogram bins, so update dataset - selection = { - dim: sel for dim, sel in selection.items() - if dim in self.dimensions()+['selection_mask'] - } - - if selected._dataset is not None: - selected._dataset = self.dataset.select(**selection) - - return selected - def _get_selection_expr_for_stream_value(self, **kwargs): from ..util.transform import dim diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index c785b23684..dd9ad05934 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -84,8 +84,38 @@ class factory(Operation): By default, if three overlaid Images elements are supplied, the corresponding RGB element will be returned. """) + args = param.List(default=[], doc=""" + The list of positional argument to pass to the factory""") + + kwargs = param.Dict(default={}, doc=""" + The dict of keyword arguments to pass to the factory""") + def _process(self, view, key=None): - return self.p.output_type(view) + return self.p.output_type(view, *self.p.args, **self.p.kwargs) + + +class method(Operation): + """ + Operation that wraps a method call + """ + output_type = param.ClassSelector(class_=type, doc=""" + The output type of the method operation""") + + input_type = param.ClassSelector(class_=type, doc=""" + The object type the method is defined on""") + + method_name = param.String(default='__call__', doc=""" + The method name""") + + args = param.List(default=[], doc=""" + The list of positional argument to pass to the method""") + + kwargs = param.Dict(default={}, doc=""" + The dict of keyword arguments to pass to the method""") + + def _process(self, element, key=None): + fn = getattr(self.p.input_type, self.p.method_name) + return fn(element, *self.p.args, **self.p.kwargs) class chain(Operation): @@ -110,9 +140,9 @@ class chain(Operation): The output type of the chain operation. Must be supplied if the chain is to be used as a channel operation.""") - group = param.String(default='Chain', doc=""" - The group assigned to the result after having applied the chain.""") - + group = param.String(default='', doc=""" + The group assigned to the result after having applied the chain. + Defaults to the group produced by the last operation in the chain""") operations = param.List(default=[], class_=Operation, doc=""" A list of Operations (or Operation instances) @@ -124,7 +154,10 @@ def _process(self, view, key=None): processed = operation.process_element(processed, key, input_ranges=self.p.input_ranges) - return processed.clone(group=self.p.group) + if not self.p.group: + return processed + else: + return processed.clone(group=self.p.group) class transform(Operation): @@ -163,7 +196,6 @@ def _process(self, img, key=None): return img.clone(processed, group=self.p.group) - class image_overlay(Operation): """ Operation to build a overlay of images to a specification from a @@ -656,19 +688,12 @@ def _process(self, element, key=None): if self.p.normed in (True, 'integral'): hist *= edges[1]-edges[0] - # Save off the kwargs needed to reproduce this Histogram later. - # We remove the properties that are used as instructions for how to - # calculate the bins, and replace those with the explicit list of bin - # edges. This way, not only can we regenerate this exact histogram - # from the same data set, but we can also generate a histogram using - # a different dataset that will share the exact same bins. - exclusions = {'log', 'bin_range', 'num_bins'} - params['_operation_kwargs'] = { - k: v for k, v in self.p.items() if k not in exclusions - } - params['_operation_kwargs']['bins'] = list(edges) + # Save off the computed bin edges so that if this operation instance + # is used to compute another histogram, it will default to the same + # bin edges. + self.bins = list(edges) return Histogram((edges, hist), kdims=[element.get_dimension(selected_dim)], - label=element.label, dataset=element.dataset, **params) + label=element.label, **params) class decimate(Operation): diff --git a/holoviews/operation/timeseries.py b/holoviews/operation/timeseries.py index 6f1246d782..d9d2018cc1 100644 --- a/holoviews/operation/timeseries.py +++ b/holoviews/operation/timeseries.py @@ -17,7 +17,7 @@ class RollingBase(param.Parameterized): Whether to set the x-coordinate at the center or right edge of the window.""") - min_periods = param.Integer(default=None, doc=""" + min_periods = param.Integer(default=None, allow_None=True, doc=""" Minimum number of observations in window required to have a value (otherwise result is NaN).""") @@ -35,7 +35,7 @@ class rolling(Operation,RollingBase): Applies a function over a rolling window. """ - window_type = param.ObjectSelector(default=None, + window_type = param.ObjectSelector(default=None, allow_None=True, objects=['boxcar', 'triang', 'blackman', 'hamming', 'bartlett', 'parzen', 'bohman', 'blackmanharris', 'nuttall', 'barthann', 'kaiser', 'gaussian', 'general_gaussian', @@ -72,7 +72,7 @@ class resample(Operation): """ closed = param.ObjectSelector(default=None, objects=['left', 'right'], - doc="Which side of bin interval is closed") + doc="Which side of bin interval is closed", allow_None=True) function = param.Callable(default=np.mean, doc=""" Function for computing new values out of existing ones.""") diff --git a/holoviews/plotting/bokeh/stats.py b/holoviews/plotting/bokeh/stats.py index 2a00673a73..08f3707694 100644 --- a/holoviews/plotting/bokeh/stats.py +++ b/holoviews/plotting/bokeh/stats.py @@ -338,7 +338,7 @@ def _kde_data(self, el, key, **kwargs): if self.clip: vdim = vdim(range=self.clip) el = el.clone(vdims=[vdim]) - kde = univariate_kde(el, dimension=vdim, **kwargs) + kde = univariate_kde(el, dimension=vdim.name, **kwargs) xs, ys = (kde.dimension_values(i) for i in range(2)) mask = isfinite(ys) & (ys>0) # Mask out non-finite and zero values xs, ys = xs[mask], ys[mask] diff --git a/holoviews/tests/core/testdatasetproperty.py b/holoviews/tests/core/testdatasetproperty.py index 6296fe75ea..6cb9e70095 100644 --- a/holoviews/tests/core/testdatasetproperty.py +++ b/holoviews/tests/core/testdatasetproperty.py @@ -1,7 +1,12 @@ +from holoviews.core import Apply, Redim from holoviews.element.comparison import ComparisonTestCase import pandas as pd from holoviews import Dataset, Curve, Dimension, Scatter, Distribution +from holoviews.operation import histogram +from holoviews.operation.datashader import dynspread, datashade, rasterize import dask.dataframe as dd +import numpy as np + class DatasetPropertyTestCase(ComparisonTestCase): @@ -23,36 +28,82 @@ def setUp(self): ] ) + self.ds2 = Dataset( + self.df.iloc[2:], + kdims=[ + Dimension('a', label="The a Column"), + Dimension('b', label="The b Column"), + Dimension('c', label="The c Column"), + Dimension('d', label="The d Column"), + ] + ) + class ConstructorTestCase(DatasetPropertyTestCase): def test_constructors_dataset(self): - expected = Dataset(self.df) - self.assertIs(expected, expected.dataset) + ds = Dataset(self.df) + self.assertIs(ds, ds.dataset) + + # Check pipeline + ops = ds.pipeline.operations + self.assertEqual(len(ops), 1) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ds, ds.pipeline(ds.dataset)) def test_constructor_curve(self): element = Curve(self.df) - expected = Dataset(self.df) + expected = Dataset( + self.df, + kdims=self.df.columns[0], + vdims=self.df.columns[1:].tolist(), + ) self.assertEqual(element.dataset, expected) + # Check pipeline + pipeline = element.pipeline + self.assertEqual(len(pipeline.operations), 1) + self.assertIs(pipeline.operations[0].output_type, Curve) + self.assertEqual(element, element.pipeline(element.dataset)) + class ToTestCase(DatasetPropertyTestCase): def test_to_element(self): curve = self.ds.to(Curve, 'a', 'b', groupby=[]) + curve2 = self.ds2.to(Curve, 'a', 'b', groupby=[]) + self.assertNotEqual(curve, curve2) + self.assertEqual(curve.dataset, self.ds) scatter = curve.to(Scatter) self.assertEqual(scatter.dataset, self.ds) + # Check pipeline + ops = curve.pipeline.operations + self.assertEqual(len(ops), 2) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + + # Execute pipeline + self.assertEqual(curve.pipeline(curve.dataset), curve) + self.assertEqual( + curve.pipeline(self.ds2), curve2 + ) + def test_to_holomap(self): curve_hmap = self.ds.to(Curve, 'a', 'b', groupby=['c']) # Check HoloMap element datasets for v in self.df.c.drop_duplicates(): curve = curve_hmap.data[(v,)] + + # check dataset self.assertEqual( - curve.dataset, self.ds.select(c=v) + curve.dataset, self.ds ) + # execute pipeline + self.assertEqual(curve.pipeline(curve.dataset), curve) + def test_to_holomap_dask(self): ddf = dd.from_pandas(self.df, npartitions=2) dds = Dataset( @@ -71,9 +122,12 @@ def test_to_holomap_dask(self): for v in self.df.c.drop_duplicates(): curve = curve_hmap.data[(v,)] self.assertEqual( - curve.dataset, self.ds.select(c=v) + curve.dataset, self.ds ) + # Execute pipeline + self.assertEqual(curve.pipeline(curve.dataset), curve) + class CloneTestCase(DatasetPropertyTestCase): def test_clone(self): @@ -81,73 +135,479 @@ def test_clone(self): self.assertEqual(self.ds.clone().dataset, self.ds) # Curve + curve = self.ds.to.curve('a', 'b', groupby=[]) + curve_clone = curve.clone() self.assertEqual( - self.ds.to.curve('a', 'b', groupby=[]).clone().dataset, + curve_clone.dataset, self.ds ) + # Check pipeline carried over + self.assertEqual( + curve.pipeline.operations, curve_clone.pipeline.operations[:2] + ) + + # Execute pipeline + self.assertEqual(curve.pipeline(curve.dataset), curve) + + def test_clone_new_data(self): + # Replacing data during clone resets .dataset + ds_clone = self.ds.clone(data=self.ds2.data) + self.assertEqual(ds_clone.dataset, self.ds2) + self.assertEqual(len(ds_clone.pipeline.operations), 1) + class ReindexTestCase(DatasetPropertyTestCase): def test_reindex_dataset(self): ds_ab = self.ds.reindex(kdims=['a'], vdims=['b']) + ds2_ab = self.ds2.reindex(kdims=['a'], vdims=['b']) + self.assertNotEqual(ds_ab, ds2_ab) + self.assertEqual(ds_ab.dataset, self.ds) + # Check pipeline + ops = ds_ab.pipeline.operations + self.assertEqual(len(ops), 2) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'reindex') + self.assertEqual(ops[1].args, []) + self.assertEqual(ops[1].kwargs, dict(kdims=['a'], vdims=['b'])) + + # Execute pipeline + self.assertEqual(ds_ab.pipeline(ds_ab.dataset), ds_ab) + self.assertEqual( + ds_ab.pipeline(self.ds2), ds2_ab + ) + def test_double_reindex_dataset(self): - ds_abc = self.ds.reindex(kdims=['a'], vdims=['b', 'c']) - ds_ab = ds_abc.reindex(kdims=['a'], vdims=['b']) + ds_ab = (self.ds + .reindex(kdims=['a'], vdims=['b', 'c']) + .reindex(kdims=['a'], vdims=['b'])) + ds2_ab = (self.ds2 + .reindex(kdims=['a'], vdims=['b', 'c']) + .reindex(kdims=['a'], vdims=['b'])) + self.assertNotEqual(ds_ab, ds2_ab) + self.assertEqual(ds_ab.dataset, self.ds) + # Check pipeline + ops = ds_ab.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'reindex') + self.assertEqual(ops[1].args, []) + self.assertEqual(ops[1].kwargs, dict(kdims=['a'], vdims=['b', 'c'])) + self.assertEqual(ops[2].method_name, 'reindex') + self.assertEqual(ops[2].args, []) + self.assertEqual(ops[2].kwargs, dict(kdims=['a'], vdims=['b'])) + + # Execute pipeline + self.assertEqual(ds_ab.pipeline(ds_ab.dataset), ds_ab) + self.assertEqual( + ds_ab.pipeline(self.ds2), ds2_ab + ) + def test_reindex_curve(self): - curve_ab = self.ds.to(Curve, 'a', 'b', groupby=[]) - curve_ba = curve_ab.reindex(kdims='b', vdims='a') - self.assertEqual(curve_ab.dataset, self.ds) + curve_ba = self.ds.to( + Curve, 'a', 'b', groupby=[] + ).reindex(kdims='b', vdims='a') + curve2_ba = self.ds2.to( + Curve, 'a', 'b', groupby=[] + ).reindex(kdims='b', vdims='a') + self.assertNotEqual(curve_ba, curve2_ba) + self.assertEqual(curve_ba.dataset, self.ds) + # Check pipeline + ops = curve_ba.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertEqual(ops[2].method_name, 'reindex') + self.assertEqual(ops[2].args, []) + self.assertEqual(ops[2].kwargs, dict(kdims='b', vdims='a')) + + # Execute pipeline + self.assertEqual(curve_ba.pipeline(curve_ba.dataset), curve_ba) + self.assertEqual( + curve_ba.pipeline(self.ds2), curve2_ba + ) + def test_double_reindex_curve(self): - curve_abc = self.ds.to(Curve, 'a', ['b', 'c'], groupby=[]) - curve_ab = curve_abc.reindex(kdims='a', vdims='b') - curve_ba = curve_ab.reindex(kdims='b', vdims='a') - self.assertEqual(curve_ab.dataset, self.ds) + curve_ba = self.ds.to( + Curve, 'a', ['b', 'c'], groupby=[] + ).reindex(kdims='a', vdims='b').reindex(kdims='b', vdims='a') + curve2_ba = self.ds2.to( + Curve, 'a', ['b', 'c'], groupby=[] + ).reindex(kdims='a', vdims='b').reindex(kdims='b', vdims='a') + self.assertNotEqual(curve_ba, curve2_ba) + self.assertEqual(curve_ba.dataset, self.ds) + # Check pipeline + ops = curve_ba.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertEqual(ops[2].method_name, 'reindex') + self.assertEqual(ops[2].args, []) + self.assertEqual(ops[2].kwargs, dict(kdims='a', vdims='b')) + self.assertEqual(ops[3].method_name, 'reindex') + self.assertEqual(ops[3].args, []) + self.assertEqual(ops[3].kwargs, dict(kdims='b', vdims='a')) + + # Execute pipeline + self.assertEqual(curve_ba.pipeline(curve_ba.dataset), curve_ba) + self.assertEqual( + curve_ba.pipeline(self.ds2), curve2_ba + ) + class IlocTestCase(DatasetPropertyTestCase): def test_iloc_dataset(self): - expected = self.ds.iloc[[0, 2]] + ds_iloc = self.ds.iloc[[0, 2]] + ds2_iloc = self.ds2.iloc[[0, 2]] + self.assertNotEqual(ds_iloc, ds2_iloc) # Dataset self.assertEqual( - self.ds.clone().iloc[[0, 2]].dataset, - expected + ds_iloc.dataset, + self.ds ) - def test_iloc_curve(self): - expected = self.ds.iloc[[0, 2]] + # Check pipeline + ops = ds_iloc.pipeline.operations + self.assertEqual(len(ops), 2) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, '_perform_getitem') + self.assertEqual(ops[1].args, [[0, 2]]) + self.assertEqual(ops[1].kwargs, {}) + + # Execute pipeline + self.assertEqual(ds_iloc.pipeline(ds_iloc.dataset), ds_iloc) + self.assertEqual( + ds_iloc.pipeline(self.ds2), ds2_iloc + ) + def test_iloc_curve(self): # Curve - curve = self.ds.to.curve('a', 'b', groupby=[]) + curve_iloc = self.ds.to.curve('a', 'b', groupby=[]).iloc[[0, 2]] + curve2_iloc = self.ds2.to.curve('a', 'b', groupby=[]).iloc[[0, 2]] + self.assertNotEqual(curve_iloc, curve2_iloc) + + self.assertEqual( + curve_iloc.dataset, + self.ds + ) + + # Check pipeline + ops = curve_iloc.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertEqual(ops[2].method_name, '_perform_getitem') + self.assertEqual(ops[2].args, [[0, 2]]) + self.assertEqual(ops[2].kwargs, {}) + + # Execute pipeline + self.assertEqual(curve_iloc.pipeline(curve_iloc.dataset), curve_iloc) + self.assertEqual( + curve_iloc.pipeline(self.ds2), curve2_iloc + ) + + +class NdlocTestCase(DatasetPropertyTestCase): + def setUp(self): + super(NdlocTestCase, self).setUp() + self.ds_grid = Dataset( + (np.arange(4), + np.arange(3), + np.array([[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12]])), + kdims=['x', 'y'], + vdims='z' + ) + + self.ds2_grid = Dataset( + (np.arange(3), + np.arange(3), + np.array([[1, 2, 4], + [5, 6, 8], + [9, 10, 12]])), + kdims=['x', 'y'], + vdims='z' + ) + + def test_ndloc_dataset(self): + ds_grid_ndloc = self.ds_grid.ndloc[0:2, 1:3] + ds2_grid_ndloc = self.ds2_grid.ndloc[0:2, 1:3] + self.assertNotEqual(ds_grid_ndloc, ds2_grid_ndloc) + + # Dataset + self.assertEqual( + ds_grid_ndloc.dataset, + self.ds_grid + ) + + # Check pipeline + ops = ds_grid_ndloc.pipeline.operations + self.assertEqual(len(ops), 2) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, '_perform_getitem') + self.assertEqual( + ops[1].args, [(slice(0, 2, None), slice(1, 3, None))] + ) + self.assertEqual(ops[1].kwargs, {}) + + # Execute pipeline + self.assertEqual( + ds_grid_ndloc.pipeline(ds_grid_ndloc.dataset), ds_grid_ndloc + ) self.assertEqual( - curve.iloc[[0, 2]].dataset, - expected + ds_grid_ndloc.pipeline(self.ds2_grid), ds2_grid_ndloc ) class SelectTestCase(DatasetPropertyTestCase): def test_select_dataset(self): + ds_select = self.ds.select(b=10) + ds2_select = self.ds2.select(b=10) + self.assertNotEqual(ds_select, ds2_select) + + # Dataset self.assertEqual( - self.ds.select(b=10).dataset, - self.ds.select(b=10) + ds_select.dataset, + self.ds + ) + + # Check pipeline + ops = ds_select.pipeline.operations + self.assertEqual(len(ops), 2) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'select') + self.assertEqual(ops[1].args, []) + self.assertEqual(ops[1].kwargs, {'b': 10}) + + # Execute pipeline + self.assertEqual(ds_select.pipeline(ds_select.dataset), ds_select) + self.assertEqual( + ds_select.pipeline(self.ds2), ds2_select ) def test_select_curve(self): + curve_select = self.ds.to.curve('a', 'b', groupby=[]).select(b=10) + curve2_select = self.ds2.to.curve('a', 'b', groupby=[]).select(b=10) + self.assertNotEqual(curve_select, curve2_select) + + # Curve + self.assertEqual( + curve_select.dataset, + self.ds + ) + + # Check pipeline + ops = curve_select.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertEqual(ops[2].method_name, 'select') + self.assertEqual(ops[2].args, []) + self.assertEqual(ops[2].kwargs, {'b': 10}) + + # Execute pipeline + self.assertEqual( + curve_select.pipeline(curve_select.dataset), curve_select + ) + self.assertEqual( + curve_select.pipeline(self.ds2), curve2_select + ) + + +class SortTestCase(DatasetPropertyTestCase): + def test_sort_curve(self): + curve_sorted = self.ds.to.curve('a', 'b', groupby=[]).sort('a') + curve_sorted2 = self.ds2.to.curve('a', 'b', groupby=[]).sort('a') + self.assertNotEqual(curve_sorted, curve_sorted2) + # Curve self.assertEqual( - self.ds.to.curve('a', 'b', groupby=[]).select(b=10).dataset, - self.ds.select(b=10) + curve_sorted.dataset, + self.ds + ) + + # Check pipeline + ops = curve_sorted.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertEqual(ops[2].method_name, 'sort') + self.assertEqual(ops[2].args, ['a']) + self.assertEqual(ops[2].kwargs, {}) + + # Execute pipeline + self.assertEqual( + curve_sorted.pipeline(curve_sorted.dataset), curve_sorted ) + self.assertEqual( + curve_sorted.pipeline(self.ds2), curve_sorted2 + ) + + +class SampleTestCase(DatasetPropertyTestCase): + def test_sample_curve(self): + curve_sampled = self.ds.to.curve('a', 'b', groupby=[]).sample([1, 2]) + curve_sampled2 = self.ds2.to.curve('a', 'b', groupby=[]).sample([1, 2]) + self.assertNotEqual(curve_sampled, curve_sampled2) + + # Curve + self.assertEqual( + curve_sampled.dataset, + self.ds + ) + + # Check pipeline + ops = curve_sampled.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertEqual(ops[2].method_name, 'sample') + self.assertEqual(ops[2].args, [[1, 2]]) + self.assertEqual(ops[2].kwargs, {}) + + # Execute pipeline + self.assertEqual( + curve_sampled.pipeline(curve_sampled.dataset), curve_sampled + ) + self.assertEqual( + curve_sampled.pipeline(self.ds2), curve_sampled2 + ) + + +class ReduceTestCase(DatasetPropertyTestCase): + def test_reduce_dataset(self): + ds_reduced = self.ds.reindex( + kdims=['b', 'c'], vdims=['a', 'd'] + ).reduce('c', function=np.sum) + + ds2_reduced = self.ds2.reindex( + kdims=['b', 'c'], vdims=['a', 'd'] + ).reduce('c', function=np.sum) + + self.assertNotEqual(ds_reduced, ds2_reduced) + self.assertEqual(ds_reduced.dataset, self.ds) + self.assertEqual(ds2_reduced.dataset, self.ds2) + + # Check pipeline + ops = ds_reduced.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'reindex') + self.assertEqual(ops[2].method_name, 'reduce') + self.assertEqual(ops[2].args, ['c']) + self.assertEqual(ops[2].kwargs, {'function': np.sum}) + + # Execute pipeline + self.assertEqual(ds_reduced.pipeline(ds_reduced.dataset), ds_reduced) + self.assertEqual( + ds_reduced.pipeline(self.ds2), ds2_reduced + ) + + +class AggregateTestCase(DatasetPropertyTestCase): + def test_aggregate_dataset(self): + ds_aggregated = self.ds.reindex( + kdims=['b', 'c'], vdims=['a', 'd'] + ).aggregate('b', function=np.sum) + + ds2_aggregated = self.ds2.reindex( + kdims=['b', 'c'], vdims=['a', 'd'] + ).aggregate('b', function=np.sum) + + self.assertNotEqual(ds_aggregated, ds2_aggregated) + self.assertEqual(ds_aggregated.dataset, self.ds) + self.assertEqual(ds2_aggregated.dataset, self.ds2) + # Check pipeline + ops = ds_aggregated.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'reindex') + self.assertEqual(ops[2].method_name, 'aggregate') + self.assertEqual(ops[2].args, ['b']) + self.assertEqual(ops[2].kwargs, {'function': np.sum}) + # Execute pipeline + self.assertEqual( + ds_aggregated.pipeline(ds_aggregated.dataset), ds_aggregated + ) + self.assertEqual( + ds_aggregated.pipeline(self.ds2), ds2_aggregated + ) + + +class GroupbyTestCase(DatasetPropertyTestCase): + def test_groupby_dataset(self): + ds_groups = self.ds.reindex( + kdims=['b', 'c'], vdims=['a', 'd'] + ).groupby('b') + + ds2_groups = self.ds2.reindex( + kdims=['b', 'c'], vdims=['a', 'd'] + ).groupby('b') + + self.assertNotEqual(ds_groups, ds2_groups) + for k in ds_groups.keys(): + ds_group = ds_groups[k] + ds2_group = ds2_groups[k] + + # Check pipeline + ops = ds_group.pipeline.operations + self.assertNotEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'reindex') + self.assertEqual(ops[2].method_name, 'groupby') + self.assertEqual(ops[2].args, ['b']) + self.assertEqual(ops[3].method_name, '__getitem__') + self.assertEqual(ops[3].args, [k]) + + # Execute pipeline + self.assertEqual(ds_group.pipeline(ds_group.dataset), ds_group) + self.assertEqual( + ds_group.pipeline(self.ds2), ds2_group + ) + + +class AddDimensionTestCase(DatasetPropertyTestCase): + def test_add_dimension_dataset(self): + ds_dim_added = self.ds.add_dimension('new', 1, 17) + ds2_dim_added = self.ds2.add_dimension('new', 1, 17) + self.assertNotEqual(ds_dim_added, ds2_dim_added) + + # Check dataset + self.assertEqual(ds_dim_added.dataset, self.ds) + self.assertEqual(ds2_dim_added.dataset, self.ds2) + + # Check pipeline + ops = ds_dim_added.pipeline.operations + self.assertEqual(len(ops), 2) + self.assertIs(ops[0].output_type, Dataset) + self.assertEqual(ops[1].method_name, 'add_dimension') + self.assertEqual(ops[1].args, ['new', 1, 17]) + self.assertEqual(ops[1].kwargs, {}) + + # Execute pipeline + self.assertEqual( + ds_dim_added.pipeline(ds_dim_added.dataset), ds_dim_added + ) + self.assertEqual( + ds_dim_added.pipeline(self.ds2), ds2_dim_added, + ) + + +# Add execute pipeline test for each method, using a different dataset (ds2) +# class HistogramTestCase(DatasetPropertyTestCase): def setUp(self): @@ -162,7 +622,21 @@ def test_clone(self): def test_select_single(self): sub_hist = self.hist.select(a=(1, None)) - self.assertEqual(sub_hist.dataset, self.ds.select(a=(1, None))) + self.assertEqual(sub_hist.dataset, self.ds) + + # Check pipeline + ops = sub_hist.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Apply) + self.assertEqual(ops[2].method_name, '__call__') + self.assertIsInstance(ops[2].args[0], histogram) + self.assertEqual(ops[3].method_name, 'select') + self.assertEqual(ops[3].args, []) + self.assertEqual(ops[3].kwargs, {'a': (1, None)}) + + # Execute pipeline + self.assertEqual(sub_hist.pipeline(sub_hist.dataset), sub_hist) def test_select_multi(self): # Add second selection on b. b is a dimension in hist.dataset but @@ -175,14 +649,41 @@ def test_select_multi(self): self.ds.select(a=(1, None), b=100) ) + # Check dataset unchanged self.assertEqual( sub_hist.dataset, - self.ds.select(a=(1, None)) + self.ds ) + # Check pipeline + ops = sub_hist.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Apply) + self.assertEqual(ops[2].method_name, '__call__') + self.assertIsInstance(ops[2].args[0], histogram) + self.assertEqual(ops[3].method_name, 'select') + self.assertEqual(ops[3].args, []) + self.assertEqual(ops[3].kwargs, {'a': (1, None), 'b': 100}) + + # Execute pipeline + self.assertEqual(sub_hist.pipeline(sub_hist.dataset), sub_hist) + def test_hist_to_curve(self): # No exception thrown - self.hist.to.curve() + curve = self.hist.to.curve() + + # Check pipeline + ops = curve.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Apply) + self.assertEqual(ops[2].method_name, '__call__') + self.assertIsInstance(ops[2].args[0], histogram) + self.assertIs(ops[3].output_type, Curve) + + # Execute pipeline + self.assertEqual(curve.pipeline(curve.dataset), curve) class DistributionTestCase(DatasetPropertyTestCase): @@ -193,3 +694,110 @@ def setUp(self): def test_distribution_dataset(self): self.assertEqual(self.distribution.dataset, self.ds) + + # Execute pipeline + self.assertEqual( + self.distribution.pipeline(self.distribution.dataset), + self.distribution, + ) + + +class DatashaderTestCase(DatasetPropertyTestCase): + + def test_rasterize_curve(self): + img = rasterize( + self.ds.to(Curve, 'a', 'b', groupby=[]), dynamic=False + ) + img2 = rasterize( + self.ds2.to(Curve, 'a', 'b', groupby=[]), dynamic=False + ) + self.assertNotEqual(img, img2) + + # Check dataset + self.assertEqual(img.dataset, self.ds) + + # Check pipeline + ops = img.pipeline.operations + self.assertEqual(len(ops), 3) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertIsInstance(ops[2], rasterize) + + # Execute pipeline + self.assertEqual(img.pipeline(img.dataset), img) + self.assertEqual(img.pipeline(self.ds2), img2) + + def test_datashade_curve(self): + rgb = dynspread(datashade( + self.ds.to(Curve, 'a', 'b', groupby=[]), dynamic=False + ), dynamic=False) + rgb2 = dynspread(datashade( + self.ds2.to(Curve, 'a', 'b', groupby=[]), dynamic=False + ), dynamic=False) + self.assertNotEqual(rgb, rgb2) + + # Check dataset + self.assertEqual(rgb.dataset, self.ds) + + # Check pipeline + ops = rgb.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertIsInstance(ops[2], datashade) + self.assertIsInstance(ops[3], dynspread) + + # Execute pipeline + self.assertEqual(rgb.pipeline(rgb.dataset), rgb) + self.assertEqual(rgb.pipeline(self.ds2), rgb2) + + +class AccessorTestCase(DatasetPropertyTestCase): + def test_apply_curve(self): + curve = self.ds.to.curve('a', 'b', groupby=[]).apply( + lambda c: Scatter(c.select(b=(20, None)).data) + ) + curve2 = self.ds2.to.curve('a', 'b', groupby=[]).apply( + lambda c: Scatter(c.select(b=(20, None)).data) + ) + self.assertNotEqual(curve, curve2) + + # Check pipeline + ops = curve.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertIs(ops[2].output_type, Apply) + self.assertEqual(ops[2].kwargs, {'mode': None}) + self.assertEqual(ops[3].method_name, '__call__') + + # Execute pipeline + self.assertEqual(curve.pipeline(curve.dataset), curve) + self.assertEqual( + curve.pipeline(self.ds2), curve2 + ) + + def test_redim_curve(self): + curve = self.ds.to.curve('a', 'b', groupby=[]).redim.unit( + a='kg', b='m' + ) + + curve2 = self.ds2.to.curve('a', 'b', groupby=[]).redim.unit( + a='kg', b='m' + ) + self.assertNotEqual(curve, curve2) + + # Check pipeline + ops = curve.pipeline.operations + self.assertEqual(len(ops), 4) + self.assertIs(ops[0].output_type, Dataset) + self.assertIs(ops[1].output_type, Curve) + self.assertIs(ops[2].output_type, Redim) + self.assertEqual(ops[2].kwargs, {'mode': 'dataset'}) + self.assertEqual(ops[3].method_name, '__call__') + + # Execute pipeline + self.assertEqual(curve.pipeline(curve.dataset), curve) + self.assertEqual( + curve.pipeline(self.ds2), curve2 + ) diff --git a/holoviews/tests/operation/testoperation.py b/holoviews/tests/operation/testoperation.py index caec21c96e..5459c7e39d 100644 --- a/holoviews/tests/operation/testoperation.py +++ b/holoviews/tests/operation/testoperation.py @@ -147,46 +147,6 @@ def test_points_histogram(self): vdims=('x_frequency', 'Frequency')) self.assertEqual(op_hist, hist) - def test_histogram_operation_kwargs(self): - points = Points([float(j) for i in range(10) for j in [i] * (2 * i)]) - op_hist = histogram( - points, - dimension='y', - normed=False, - num_bins=10, - bin_range=[0, 10], - ) - - hist = Histogram(( - [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] - ), vdims=('y_count', 'Count'), kdims='y') - - # Check histogram - self.assertEqual(op_hist, hist) - - # Check operation kwargs for histogram generated with operation - self.assertEqual( - op_hist._operation_kwargs, - {'dimension': 'y', - 'normed': False, - 'dynamic': False, - 'bins': [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]} - ) - - # Test that operation_kwargs is preserved through clone - self.assertEqual( - op_hist.clone()._operation_kwargs, - {'dimension': 'y', - 'normed': False, - 'dynamic': False, - 'bins': [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]} - ) - - # Check that operation kwargs is None for histogram generated directly - # from the Histogram constructor - self.assertIsNone(hist._operation_kwargs) - @da_skip def test_dataset_histogram_dask(self): import dask.array as da