holoviz · philippjfr · Nov 20, 2020 · Jul 9, 2020 · Jul 9, 2020 · Jul 26, 2020
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -20,6 +20,8 @@ jobs:
         exclude:
           - os: windows-latest
             python-version: 2.7
+          - os: macos-latest
+            python-version: 3.7
     timeout-minutes: 30
     defaults:
       run:
@@ -28,7 +30,7 @@ jobs:
       DESC: "Python ${{ matrix.python-version }} tests"
       HV_REQUIREMENTS: "unit_tests"
       PYTHON_VERSION: ${{ matrix.python-version }}
-      CHANS_DEV: "-c pyviz/label/dev -c bokeh"
+      CHANS_DEV: "-c pyviz/label/dev"
       CHANS: "-c pyviz"
       MPLBACKEND: "Agg"
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -61,6 +63,12 @@ jobs:
           git describe
           echo "======"
           conda list
+      - name: bokeh update
+        if: startsWith(matrix.python-version, 3.)
+        run: |
+          eval "$(conda shell.bash hook)"
+          conda activate test-environment
+          conda install "bokeh>=2.2"
       - name: matplotlib patch
         if: startsWith(matrix.python-version, 3.)
         run: |

diff --git a/.gitignore b/.gitignore
@@ -28,3 +28,6 @@ holoviews.rc
 ghostdriver.log
 holoviews/.version
 .dir-locals.el
+.doit.db
+.vscode/settings.json
+holoviews/.vscode/settings.json
diff --git a/.travis.yml b/.travis.yml
@@ -22,7 +22,7 @@ notifications:
 env:
   global:
     - PKG_TEST_PYTHON="--test-python=py37 --test-python=py27"
-    - CHANS_DEV="-c pyviz/label/dev -c bokeh"
+    - CHANS_DEV="-c pyviz/label/dev -c bokeh -c conda-forge"
     - CHANS="-c pyviz"
     - MPLBACKEND="Agg"
     - PYTHON_VERSION=3.7
@@ -69,7 +69,7 @@ jobs:
       install:
         - doit env_create $CHANS_DEV --python=$PYTHON_VERSION
         - source activate test-environment
-        - travis_wait 30 doit develop_install $CHANS_DEV -o $HV_REQUIREMENTS
+        - travis_wait 45 doit develop_install $CHANS_DEV -o $HV_REQUIREMENTS
         - if [ "$PYTHON_VERSION" == "3.6" ]; then conda uninstall matplotlib matplotlib-base --force; conda install $CHANS_DEV matplotlib=3.0.3 --no-deps; fi;
         - doit env_capture
         - hash -r

diff --git a/binder/environment.yml b/binder/environment.yml
@@ -32,3 +32,4 @@ dependencies:
   - bzip2
   - dask
   - scipy
+  - ibis-framework >= 1.3
diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
@@ -10,6 +10,7 @@
 
 import numpy as np
 import param
+import pandas as pd # noqa
 
 from param.parameterized import add_metaclass, ParameterizedMetaclass
 
@@ -21,57 +22,24 @@
 from ..element import Element
 from ..ndmapping import OrderedDict, MultiDimensionalMapping
 from ..spaces import HoloMap, DynamicMap
-from .interface import Interface, iloc, ndloc
-from .array import ArrayInterface
-from .dictionary import DictInterface
-from .grid import GridInterface
+
+from .array import ArrayInterface             # noqa (API import)
+from .cudf import cuDFInterface               # noqa (API import)
+from .dask import DaskInterface               # noqa (API import)
+from .dictionary import DictInterface         # noqa (API import)
+from .grid import GridInterface               # noqa (API import)
+from .ibis import IbisInterface               # noqa (API import)
+from .interface import Interface, iloc, ndloc # noqa (API import)
 from .multipath import MultiInterface         # noqa (API import)
 from .image import ImageInterface             # noqa (API import)
+from .pandas import PandasInterface           # noqa (API import)
+from .spatialpandas import SpatialPandasInterface # noqa (API import)
+from .xarray import XArrayInterface           # noqa (API import)
 
-default_datatype = 'dictionary'
-datatypes = ['dictionary', 'grid']
-
-try:
-    import pandas as pd # noqa (Availability import)
-    from .pandas import PandasInterface
-    default_datatype = 'dataframe'
-    datatypes.insert(0, 'dataframe')
-    DFColumns = PandasInterface
-except ImportError:
-    pd = None
-except Exception as e:
-    pd = None
-    param.main.param.warning('Pandas interface failed to import with '
-                             'following error: %s' % e)
-
-try:
-    from .spatialpandas import SpatialPandasInterface # noqa (API import)
-    datatypes.append('spatialpandas')
-except ImportError:
-    pass
-
-try:
-    from .xarray import XArrayInterface # noqa (Conditional API import)
-    datatypes.append('xarray')
-except ImportError:
-    pass
+default_datatype = 'dataframe'
 
-try:
-    from .cudf import cuDFInterface   # noqa (Conditional API import)
-    datatypes.append('cuDF')
-except ImportError:
-    pass
-
-try:
-    from .dask import DaskInterface   # noqa (Conditional API import)
-    datatypes.append('dask')
-except ImportError:
-    pass
-
-if 'array' not in datatypes:
-    datatypes.append('array')
-if 'multitabular' not in datatypes:
-    datatypes.append('multitabular')
+datatypes = ['dataframe', 'dictionary', 'grid', 'xarray', 'dask',
+             'cuDF', 'spatialpandas', 'array', 'multitabular', 'ibis']
 
 
 def concat(datasets, datatype=None):
@@ -370,6 +338,10 @@ def __init__(self, data, kdims=None, vdims=None, **kwargs):
         )
         self._transforms = input_transforms or []
 
+        # On lazy interfaces this allows keeping an evaluated version
+        # of the dataset in memory
+        self._cached = None
+
         # Handle initializing the dataset property.
         self._dataset = input_dataset
         if self._dataset is None and isinstance(input_data, Dataset) and not dataset_provided:
@@ -403,7 +375,6 @@ def dataset(self):
             return Dataset(self, _validate_vdims=False, **self._dataset)
         return self._dataset
 
-
     @property
     def pipeline(self):
         """
@@ -413,6 +384,34 @@ def pipeline(self):
         """
         return self._pipeline
 
+    def compute(self):
+        """
+        Computes the data to a data format that stores the daata in
+        memory, e.g. a Dask dataframe or array is converted to a
+        Pandas DataFrame or NumPy array.
+
+        Returns:
+            Dataset with the data stored in in-memory format
+        """
+        return self.interface.compute(self)
+
+    def persist(self):
+        """
+        Persists the results of a lazy data interface to memory to
+        speed up data manipulation and visualization. If the
+        particular data backend already holds the data in memory
+        this is a no-op. Unlike the compute method this maintains
+        the same data type.
+
+        Returns:
+            Dataset with the data persisted to memory
+        """
+        persisted = self.interface.persist(self)
+        if persisted.interface is self.interface:
+            return persisted
+        self._cached = persisted
+        return self
+
     def closest(self, coords=[], **kwargs):
         """Snaps coordinate(s) to closest coordinate in Dataset
 
@@ -441,7 +440,7 @@ def closest(self, coords=[], **kwargs):
         if xs.dtype.kind in 'SO':
             raise NotImplementedError("Closest only supported for numeric types")
         idxs = [np.argmin(np.abs(xs-coord)) for coord in coords]
-        return [xs[idx] for idx in idxs]
+        return [type(s)(xs[idx]) for s, idx in zip(coords, idxs)]
 
 
     def sort(self, by=None, reverse=False):
@@ -594,15 +593,13 @@ def select(self, selection_expr=None, selection_specs=None, **selection):
         # Handle selection dim expression
         if selection_expr is not None:
             mask = selection_expr.apply(self, compute=False, keep_index=True)
-            dataset = self[mask]
-        else:
-            dataset = self
+            selection = {'selection_mask': mask}
 
         # Handle selection kwargs
         if selection:
-            data = dataset.interface.select(dataset, **selection)
+            data = self.interface.select(self, **selection)
         else:
-            data = dataset.data
+            data = self.data
 
         if np.isscalar(data):
             return data
@@ -678,7 +675,7 @@ def __getitem__(self, slices):
             if not len(slices) == len(self):
                 raise IndexError("Boolean index must match length of sliced object")
             return self.clone(self.select(selection_mask=slices))
-        elif slices in [(), Ellipsis]:
+        elif (isinstance(slices, ()) and len(slices) == 1) or slices is Ellipsis:
             return self
         if not isinstance(slices, tuple): slices = (slices,)
         value_select = None
@@ -770,7 +767,7 @@ def sample(self, samples=[], bounds=None, closest=True, **kwargs):
         # may be replaced with more general handling
         # see https://github.com/ioam/holoviews/issues/1173
         from ...element import Table, Curve
-        datatype = ['dataframe', 'dictionary', 'dask']
+        datatype = ['dataframe', 'dictionary', 'dask', 'ibis']
         if len(samples) == 1:
             sel = {kd.name: s for kd, s in zip(self.kdims, samples[0])}
             dims = [kd for kd, v in sel.items() if not np.isscalar(v)]
@@ -879,7 +876,7 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
 
         # Handle functions
         kdims = [self.get_dimension(d, strict=True) for d in dimensions]
-        if not len(self):
+        if not self:
             if spreadfn:
                 spread_name = spreadfn.__name__
                 vdims = [d for vd in self.vdims for d in [vd, vd.clone('_'.join([vd.name, spread_name]))]]
@@ -905,7 +902,9 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
             for i, d in enumerate(vdims):
                 dim = d.clone('_'.join([d.name, spread_name]))
                 dvals = error.dimension_values(d, flat=False)
-                combined = combined.add_dimension(dim, ndims+i, dvals, True)
+                idx = vdims.index(d)
+                combined = combined.add_dimension(dim, idx+1, dvals, True)
+                vdims = combined.vdims
             return combined.clone(new_type=Dataset if generic_type else type(self))
 
         if np.isscalar(aggregated):
@@ -1241,10 +1240,3 @@ def ndloc(self):
             dataset.ndloc[[1, 2, 3], [0, 2, 3]]
         """
         return ndloc(self)
-
-
-# Aliases for pickle backward compatibility
-Columns      = Dataset
-ArrayColumns = ArrayInterface
-DictColumns  = DictInterface
-GridColumns  = GridInterface
diff --git a/holoviews/core/data/dask.py b/holoviews/core/data/dask.py
@@ -70,6 +70,14 @@ def init(cls, eltype, data, kdims, vdims):
                 data = reset
         return data, dims, extra
 
+    @classmethod
+    def compute(cls, dataset):
+        return dataset.clone(dataset.data.compute())
+
+    @classmethod
+    def persiste(cls, dataset):
+        return dataset.clone(dataset.data.persist())
+
     @classmethod
     def shape(cls, dataset):
         return (len(dataset.data), len(dataset.data.columns))
@@ -263,9 +271,11 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):
         data = dataset.data
         if dimension.name not in data.columns:
             if not np.isscalar(values):
-                err = ('Dask dataframe does not support assigning '
-                       'non-scalar value.')
-                raise NotImplementedError(err)
+                if len(values):
+                    err = ('Dask dataframe does not support assigning '
+                           'non-scalar value.')
+                    raise NotImplementedError(err)
+                values = None
             data = data.assign(**{dimension.name: values})
         return data
 

diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py
@@ -409,6 +409,17 @@ def ndloc(cls, dataset, indices):
             selected[d.name] = arr[tuple(adjusted_inds)]
         return tuple(selected[d.name] for d in dataset.dimensions())
 
+    @classmethod
+    def persist(cls, dataset):
+        da = dask_array_module()
+        return {k: v.persist() if da and isinstance(v, da.Array) else v
+                for k, v in dataset.data.items()}
+
+    @classmethod
+    def compute(cls, dataset):
+        da = dask_array_module()
+        return {k: v.compute() if da and isinstance(v, da.Array) else v
+                for k, v in dataset.data.items()}
 
     @classmethod
     def values(cls, dataset, dim, expanded=True, flat=True, compute=True,