From fc148de8f5070c94937be07d664302e718b5673c Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Sat, 12 Jan 2019 11:23:51 -0500
Subject: [PATCH 01/45] Fix for pandas 0.24.0rc1

---
 datashader/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datashader/utils.py b/datashader/utils.py
index a95c228ef..d7f6c2725 100644
--- a/datashader/utils.py
+++ b/datashader/utils.py
@@ -345,7 +345,7 @@ def dshape_from_pandas_helper(col):
         ))
         return datashape.Categorical(col.cat.categories.values,
                                      type=cat_dshape,
-                                     ordered=col.cat.categorical.ordered)
+                                     ordered=col.cat.ordered)
     elif col.dtype.kind == 'M':
         tz = getattr(col.dtype, 'tz', None)
         if tz is not None:

From 864a2355a93ed9027fb03eb3a509b362abc2f874 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Sat, 12 Jan 2019 20:37:49 -0500
Subject: [PATCH 02/45] Initial RaggedArray implementation

---
 datashader/__init__.py             |   3 +
 datashader/datatypes.py            | 361 +++++++++++++++++++++++++++++
 datashader/tests/test_datatypes.py | 305 ++++++++++++++++++++++++
 3 files changed, 669 insertions(+)
 create mode 100644 datashader/datatypes.py
 create mode 100644 datashader/tests/test_datatypes.py

diff --git a/datashader/__init__.py b/datashader/__init__.py
index 017672043..747f21161 100644
--- a/datashader/__init__.py
+++ b/datashader/__init__.py
@@ -15,6 +15,9 @@
 except ImportError:
     pass
 
+# Make ragged pandas extension array available
+from . import datatypes
+
 # make pyct's example/data commands available if possible
 from functools import partial
 try:
diff --git a/datashader/datatypes.py b/datashader/datatypes.py
new file mode 100644
index 000000000..bc66c90d7
--- /dev/null
+++ b/datashader/datatypes.py
@@ -0,0 +1,361 @@
+import numpy as np
+from pandas.api.extensions import ExtensionDtype, ExtensionArray
+from pandas.core.dtypes.dtypes import register_extension_dtype
+from numbers import Integral
+
+
+@register_extension_dtype
+class RaggedDtype(ExtensionDtype):
+    name = 'ragged'
+    type = np.ndarray
+    base = np.dtype('O')
+
+    @classmethod
+    def construct_array_type(cls):
+        return RaggedArray
+
+    @classmethod
+    def construct_from_string(cls, string):
+        if string == cls.name:
+            return cls()
+        else:
+            raise TypeError("Cannot construct a '{}' from '{}'"
+                            .format(cls, string))
+
+
+class RaggedArray(ExtensionArray):
+    def __init__(self, data, dtype=None):
+        """
+        Construct a RaggedArray
+
+        Parameters
+        ----------
+        data
+            List or numpy array of lists or numpy arrays
+        dtype: np.dtype or str or None (default None)
+            Datatype to use to store underlying values from data.
+            If none (the default) then dtype will be determined using the
+            numpy.result_type function
+        """
+        if (isinstance(data, dict) and
+                all(k in data for k in
+                    ['mask', 'start_indices', 'flat_array'])):
+
+            self._mask = data['mask']
+            self._start_indices = data['start_indices']
+            self._flat_array = data['flat_array']
+        else:
+            # Compute lengths
+            index_len = len(data)
+            buffer_len = sum(len(datum)
+                             if datum is not None
+                             else 0 for datum in data)
+
+            # Compute necessary precision of start_indices array
+            for nbits in [8, 16, 32, 64]:
+                start_indices_dtype = 'uint' + str(nbits)
+                max_supported = np.iinfo(start_indices_dtype).max
+                if buffer_len <= max_supported:
+                    break
+
+            # infer dtype if not provided
+            if dtype is None:
+                dtype = np.result_type(*[np.atleast_1d(v)
+                                         for v in data
+                                         if v is not None])
+
+            # Initialize representation arrays
+            self._mask = np.zeros(index_len, dtype='bool')
+            self._start_indices = np.zeros(index_len, dtype=start_indices_dtype)
+            self._flat_array = np.zeros(buffer_len, dtype=dtype)
+
+            # Populate arrays
+            next_start_ind = 0
+            for i, array_el in enumerate(data):
+                # Check for null values
+                isnull = array_el is None
+
+                # Compute element length
+                n = len(array_el) if not isnull else 0
+
+                # Update mask
+                self._mask[i] = isnull
+
+                # Update start indices
+                self._start_indices[i] = next_start_ind
+
+                # Update flat array
+                self._flat_array[next_start_ind:next_start_ind+n] = array_el
+
+                # increment next start index
+                next_start_ind += n
+
+        # This is a workaround (hack?) to keep pandas.lib.infer_dtype from
+        # "raising cannot infer type" ValueError error when calling:
+        # >>> pd.Series([[0, 1], [1, 2, 3]], dtype='ragged')
+        self._values = self._flat_array
+
+    @property
+    def flat_array(self):
+        """
+        numpy array containing concatenation of all nested arrays
+
+        Returns
+        -------
+        np.ndarray
+        """
+        return self._flat_array
+
+    @property
+    def mask(self):
+        """
+        boolean numpy array the same length as the ragged array where values
+        of True indicate missing values.
+
+        Returns
+        -------
+        np.ndarray
+        """
+        return self._mask
+
+    @property
+    def start_indices(self):
+        """
+        integer numpy array the same length as the ragged array where values
+        represent the index into flat_array where the corresponding ragged
+        array element begins.
+
+        Returns
+        -------
+        np.ndarray
+        """
+        return self._start_indices
+
+    def __len__(self):
+        """
+        Length of this array
+
+        Returns
+        -------
+        length : int
+        """
+        return len(self._start_indices)
+
+    def __getitem__(self, item):
+        """
+        Parameters
+        ----------
+        item : int, slice, or ndarray
+            * int: The position in 'self' to get.
+
+            * slice: A slice object, where 'start', 'stop', and 'step' are
+              integers or None
+
+            * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
+        """
+        if isinstance(item, Integral):
+            if item < -len(self) or item >= len(self):
+                raise IndexError(item)
+            elif self.mask[item]:
+                return None
+            else:
+                # Convert negative item index
+                if item < 0:
+                    item = 5 + item
+
+                slice_start = self.start_indices[item]
+                slice_end = (self.start_indices[item+1]
+                             if item + 1 <= len(self) - 1
+                             else len(self.flat_array))
+
+                return self.flat_array[slice_start:slice_end]
+
+        elif type(item) == slice:
+            data = []
+            selected_indices = np.arange(len(self))[item]
+
+            for selected_index in selected_indices:
+                data.append(self[selected_index])
+
+            return RaggedArray(data, dtype=self.flat_array.dtype)
+
+        elif isinstance(item, np.ndarray) and item.dtype == 'bool':
+            data = []
+
+            for i, m in enumerate(item):
+                if m:
+                    data.append(self[i])
+
+            return RaggedArray(data, dtype=self.flat_array.dtype)
+        else:
+            raise KeyError(item)
+
+    @classmethod
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        """
+        Construct a new RaggedArray from a sequence of scalars.
+
+        Parameters
+        ----------
+        scalars : Sequence
+            Each element will be an instance of the scalar type for this
+            array, ``cls.dtype.type``.
+        dtype : dtype, optional
+            Construct for this particular dtype. This should be a Dtype
+            compatible with the ExtensionArray.
+        copy : boolean, default False
+            If True, copy the underlying data.
+
+        Returns
+        -------
+        RaggedArray
+        """
+        return RaggedArray(scalars)
+
+    @classmethod
+    def _from_factorized(cls, values, original):
+        """
+        Reconstruct an ExtensionArray after factorization.
+
+        Parameters
+        ----------
+        values : ndarray
+            An integer ndarray with the factorized values.
+        original : RaggedArray
+            The original ExtensionArray that factorize was called on.
+
+        See Also
+        --------
+        pandas.factorize
+        ExtensionArray.factorize
+        """
+        return RaggedArray(values, dtype=original.flat_array.dtype)
+
+    def _values_for_factorize(self):
+        # Here we return a list of the ragged elements converted into tuples.
+        # This is very inefficient, but the elements of this list must be
+        # hashable, and we must be able to reconstruct a new Ragged Array
+        # from these elements.
+        #
+        # Perhaps we could replace these tuples with a class that provides a
+        # read-only view of an ndarray slice and provides a hash function.
+        return [tuple(self[i]) if not self.mask[i] else None
+                for i in range(len(self))], None
+
+    def isna(self):
+        """
+        A 1-D array indicating if each value is missing.
+
+        Returns
+        -------
+        na_values : np.ndarray
+            boolean ndarray the same length as the ragged array where values
+            of True represent missing/NA values.
+        """
+        return self.mask
+
+    def take(self, indices, allow_fill=False, fill_value=None):
+        """
+        Take elements from an array.
+
+        Parameters
+        ----------
+        indices : sequence of integers
+            Indices to be taken.
+        allow_fill : bool, default False
+            How to handle negative values in `indices`.
+
+            * False: negative values in `indices` indicate positional indices
+              from the right (the default). This is similar to
+              :func:`numpy.take`.
+
+            * True: negative values in `indices` indicate
+              missing values. These values are set to `fill_value`. Any other
+              other negative values raise a ``ValueError``.
+
+        fill_value : any, default None
+            Fill value to use for NA-indices when `allow_fill` is True.
+
+        Returns
+        -------
+        RaggedArray
+
+        Raises
+        ------
+        IndexError
+            When the indices are out of bounds for the array.
+        """
+        if allow_fill:
+            sequence = [self[i] if i >= 0 else fill_value
+                        for i in indices]
+        else:
+            sequence = [self[i] for i in indices]
+
+        return RaggedArray(sequence, dtype=self.flat_array.dtype)
+
+    def copy(self, deep=False):
+        """
+        Return a copy of the array.
+
+        Parameters
+        ----------
+        deep : bool, default False
+            Also copy the underlying data backing this array.
+
+        Returns
+        -------
+        RaggedArray
+        """
+        data = dict(
+            mask=self.mask,
+            flat_array=self.flat_array,
+            start_indices=self.start_indices)
+
+        if deep:
+            # Copy underlying numpy arrays
+            data = {k: v.copy() for k, v in data.items()}
+
+        return RaggedArray(data)
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        """
+        Concatenate multiple RaggedArray instances
+
+        Parameters
+        ----------
+        to_concat : list of RaggedArray
+
+        Returns
+        -------
+        RaggedArray
+        """
+        # concat masks
+        mask = np.hstack(ra.mask for ra in to_concat)
+
+        # concat flat_arrays
+        flat_array = np.hstack(ra.flat_array for ra in to_concat)
+
+        # offset and concat start_indices
+        offsets = np.hstack([
+            [0],
+            np.cumsum([len(ra.flat_array) for ra in to_concat[:-1]])])
+
+        start_indices = np.hstack([ra.start_indices + offset
+                                   for offset, ra in zip(offsets, to_concat)])
+
+        return RaggedArray(dict(
+            mask=mask, flat_array=flat_array, start_indices=start_indices))
+
+    @property
+    def dtype(self):
+        return RaggedDtype
+
+    @property
+    def nbytes(self):
+        """
+        The number of bytes needed to store this object in memory.
+        """
+        return (self._flat_array.nbytes +
+                self._start_indices.nbytes +
+                self._mask.nbytes)
diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
new file mode 100644
index 000000000..83b9e7c5c
--- /dev/null
+++ b/datashader/tests/test_datatypes.py
@@ -0,0 +1,305 @@
+import pytest
+import numpy as np
+import pandas as pd
+from datashader.datatypes import RaggedDtype, RaggedArray
+
+
+# Testing helpers
+# ---------------
+def assert_ragged_arrays_equal(ra1, ra2):
+    assert np.array_equal(ra1.mask, ra2.mask)
+    assert np.array_equal(ra1.start_indices, ra2.start_indices)
+    assert np.array_equal(ra1.flat_array, ra2.flat_array)
+    assert np.array_equal(ra1.flat_array.dtype, ra2.flat_array.dtype)
+
+
+# Test constructor and properties
+# -------------------------------
+def test_construct_ragged_dtype():
+    dtype = RaggedDtype()
+    assert dtype.type == np.ndarray
+    assert dtype.name == 'ragged'
+    assert dtype.kind == 'O'
+
+
+def test_construct_ragged_array():
+    rarray = RaggedArray([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]],
+                         dtype='int32')
+
+    # Check flat array
+    assert rarray.flat_array.dtype == 'int32'
+    assert np.array_equal(
+        rarray.flat_array,
+        np.array([1, 2, 10, 20, 30, 11, 22, 33, 44], dtype='int32'))
+
+    # Check start indices
+    assert rarray.start_indices.dtype == 'uint8'
+    assert np.array_equal(
+        rarray.start_indices,
+        np.array([0, 2, 2, 5, 5], dtype='uint64'))
+
+    # Check mask
+    assert rarray.mask.dtype == 'bool'
+    assert np.array_equal(
+        rarray.mask,
+        np.array([False, False, False, True, False], dtype='bool'))
+
+    # Check len
+    assert len(rarray) == 5
+
+    # Check isna
+    assert rarray.isna().dtype == 'bool'
+    assert np.array_equal(
+        rarray.isna(), [False, False, False, True, False])
+
+    # Check nbytes
+    expected = (
+            9 * np.int32().nbytes +  # flat_array
+            5 * np.uint8().nbytes +  # start_indices
+            5                        # mask
+    )
+    assert rarray.nbytes == expected
+
+    # Check dtype
+    assert rarray.dtype == RaggedDtype
+
+
+def test_start_indices_dtype():
+    # The start_indices dtype should be an unsiged int that is only as large
+    # as needed to handle the length of the flat array
+
+    # Empty
+    rarray = RaggedArray([[]], dtype='int64')
+    assert rarray.start_indices.dtype == np.dtype('uint8')
+    assert np.array_equal(rarray.start_indices, [0])
+
+    # Small
+    rarray = RaggedArray([[23, 24]], dtype='int64')
+    assert rarray.start_indices.dtype == np.dtype('uint8')
+    assert np.array_equal(rarray.start_indices, [0])
+
+    # Max uint8
+    max_uint8 = np.iinfo('uint8').max
+    rarray = RaggedArray([np.zeros(max_uint8), []], dtype='int64')
+    assert rarray.start_indices.dtype == np.dtype('uint8')
+    assert np.array_equal(rarray.start_indices, [0, max_uint8])
+
+    # Min uint16
+    rarray = RaggedArray([np.zeros(max_uint8 + 1), []], dtype='int64')
+    assert rarray.start_indices.dtype == np.dtype('uint16')
+    assert np.array_equal(rarray.start_indices, [0, max_uint8 + 1])
+
+    # Max uint16
+    max_uint16 = np.iinfo('uint16').max
+    rarray = RaggedArray([np.zeros(max_uint16), []], dtype='int64')
+    assert rarray.start_indices.dtype == np.dtype('uint16')
+    assert np.array_equal(rarray.start_indices, [0, max_uint16])
+
+    # Min uint32
+    rarray = RaggedArray([np.zeros(max_uint16 + 1), []], dtype='int64')
+    assert rarray.start_indices.dtype == np.dtype('uint32')
+    assert np.array_equal(rarray.start_indices, [0, max_uint16 + 1])
+
+
+@pytest.mark.parametrize('arg,expected', [
+    ([[1, 2]], 'int64'),
+    ([[True], [False, True]], 'bool'),
+    (np.array([np.array([1, 2], dtype='int8'),
+               np.array([1, 2], dtype='int32')]), 'int32'),
+    ([[3.2], [2]], 'float64'),
+    ([np.array([3.2], dtype='float16'),
+      np.array([2], dtype='float32')], 'float32')
+])
+def test_flat_array_type_inference(arg, expected):
+    rarray = RaggedArray(arg)
+    assert rarray.flat_array.dtype == np.dtype(expected)
+
+
+# __getitem__
+# -----------
+def test_get_item_scalar():
+    arg = [[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]]
+    rarray = RaggedArray(arg, dtype='float16')
+
+    # Forward
+    for i, expected in enumerate(arg):
+        result = rarray[i]
+        if expected is None:
+            assert result is None
+        else:
+            assert result.dtype == 'float16'
+            assert np.array_equal(result, expected)
+
+    # Reversed
+    for i, expected in enumerate(arg):
+        result = rarray[i - 5]
+        if expected is None:
+            assert result is None
+        else:
+            assert result.dtype == 'float16'
+            assert np.array_equal(result, expected)
+
+
+@pytest.mark.parametrize('index', [-1000, -6, 5, 1000])
+def test_get_item_scalar_out_of_bounds(index):
+    rarray = RaggedArray([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]])
+    with pytest.raises(IndexError) as e:
+        result = rarray[index]
+
+
+def test_get_item_slice():
+    arg = [[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]]
+    rarray = RaggedArray(arg, dtype='int16')
+
+    # Slice everything
+    assert_ragged_arrays_equal(rarray[:], rarray)
+
+    # Slice all but the first
+    assert_ragged_arrays_equal(
+        rarray[1:], RaggedArray(arg[1:], dtype='int16'))
+
+    # Slice all but the last
+    assert_ragged_arrays_equal(
+        rarray[:-1], RaggedArray(arg[:-1], dtype='int16'))
+
+    # Slice middle
+    assert_ragged_arrays_equal(
+        rarray[2:-1], RaggedArray(arg[2:-1], dtype='int16'))
+
+    # Empty slice
+    assert_ragged_arrays_equal(
+        rarray[2:1], RaggedArray(arg[2:1], dtype='int16'))
+
+
+@pytest.mark.parametrize('mask', [
+    [1, 1, 1, 1, 1],
+    [0, 1, 0, 1, 1],
+    [0, 0, 0, 0, 0]
+])
+def test_get_item_mask(mask):
+    arg = np.array([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]])
+    rarray = RaggedArray(arg, dtype='int16')
+    mask = np.array(mask, dtype='bool')
+
+    assert_ragged_arrays_equal(
+        rarray[mask],
+        RaggedArray(arg[mask], dtype='int16'))
+
+
+# _from_factorized
+# ----------------
+def test_factorization():
+    arg = np.array([[1, 2], [], [1, 2], None, [11, 22, 33, 44]])
+    rarray = RaggedArray(arg, dtype='int16')
+    labels, uniques = rarray.factorize()
+
+    assert np.array_equal(labels, [0, 1, 0, -1, 2])
+    assert_ragged_arrays_equal(
+        uniques, RaggedArray([[1, 2], [], [11, 22, 33, 44]], dtype='int16'))
+
+
+# _from_sequence
+# --------------
+def test_from_sequence():
+    sequence = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]]
+    rarray = RaggedArray._from_sequence(sequence)
+
+    assert_ragged_arrays_equal(
+        rarray, RaggedArray(sequence))
+
+
+# copy
+# ----
+def test_copy():
+    # Create reference ragged array
+    original = RaggedArray._from_sequence(
+        [[1, 2], [], [1, 2], None, [11, 22, 33, 44]])
+
+    # Copy reference array
+    copied = original.copy(deep=True)
+
+    # Make sure arrays are equal
+    assert_ragged_arrays_equal(original, copied)
+
+    # Modify buffer in original
+    original.flat_array[0] = 99
+    assert original.flat_array[0] == 99
+
+    # Make sure copy was not modified
+    assert copied.flat_array[0] == 1
+
+
+# take
+# ----
+def test_take():
+    #
+    rarray = RaggedArray._from_sequence(
+        [[1, 2], [], [10, 20], None, [11, 22, 33, 44]])
+
+    # allow_fill False
+    result = rarray.take([0, 2, 1, -1, -2, 0], allow_fill=False)
+    expected = RaggedArray(
+        [[1, 2], [10, 20], [], [11, 22, 33, 44], None, [1, 2]])
+    assert_ragged_arrays_equal(result, expected)
+
+    # allow fill True
+    result = rarray.take([0, 2, 1, -1, -1, 0], allow_fill=True)
+    expected = RaggedArray(
+        [[1, 2], [10, 20], [], None, None, [1, 2]])
+    assert_ragged_arrays_equal(result, expected)
+
+
+# _concat_same_type
+# -----------------
+def test_concat_same_type():
+    arg1 = [[1, 2], [], [10, 20], None, [11, 22, 33, 44]]
+    rarray1 = RaggedArray(arg1, dtype='float32')
+
+    arg2 = [[100, 200], None, [99, 100, 101]]
+    rarray2 = RaggedArray(arg2, dtype='float32')
+
+    arg3 = [None, [27, 28]]
+    rarray3 = RaggedArray(arg3, dtype='float32')
+
+    result = RaggedArray._concat_same_type([rarray1, rarray2, rarray3])
+    expected = RaggedArray(arg1 + arg2 + arg3, dtype='float32')
+
+    assert_ragged_arrays_equal(result, expected)
+
+
+# Test pandas operations
+# ----------------------
+def test_pandas_array_construction():
+    arg = [[0, 1], [1, 2, 3, 4], None, [-1, -2]] * 2
+    ra = pd.array(arg, dtype='ragged')
+
+    expected = RaggedArray(arg)
+    assert_ragged_arrays_equal(ra, expected)
+
+
+def test_series_construction():
+    arg = [[0, 1], [1, 2, 3, 4], None, [-1, -2]] * 2
+    rs = pd.Series(arg, dtype='ragged')
+    ra = rs.array
+
+    expected = RaggedArray(arg)
+    assert_ragged_arrays_equal(ra, expected)
+
+
+def test_concat_series():
+    arg1 = [[1, 2], [], [10, 20], None, [11, 22, 33, 44]]
+    s1 = pd.Series(arg1, dtype='ragged')
+
+    arg2 = [[100, 200], None, [99, 100, 101]]
+    s2 = pd.Series(arg2, dtype='ragged')
+
+    arg3 = [None, [27, 28]]
+    s3 = pd.Series(arg3, dtype='ragged')
+
+    s_concat = pd.concat([s1, s2, s3])
+
+    expected = pd.Series(arg1+arg2+arg3,
+                         dtype='ragged',
+                         index=[0, 1, 2, 3, 4, 0, 1, 2, 0, 1])
+
+    pd.testing.assert_series_equal(s_concat, expected)

From 440e207feafee08c13a1a623a1e30013f56528a7 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Sun, 13 Jan 2019 18:24:45 -0500
Subject: [PATCH 03/45] Add the extension test suite provided by pandas and fix
 tests.

Something in the fixes for these tests removed the need to for the ._values hack!
---
 datashader/datatypes.py            | 19 +++++----
 datashader/tests/test_datatypes.py | 62 +++++++++++++++++++++++++++++-
 2 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index bc66c90d7..3ef8171f3 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -16,8 +16,8 @@ def construct_array_type(cls):
 
     @classmethod
     def construct_from_string(cls, string):
-        if string == cls.name:
-            return cls()
+        if string == 'ragged':
+            return RaggedDtype()
         else:
             raise TypeError("Cannot construct a '{}' from '{}'"
                             .format(cls, string))
@@ -90,11 +90,6 @@ def __init__(self, data, dtype=None):
                 # increment next start index
                 next_start_ind += n
 
-        # This is a workaround (hack?) to keep pandas.lib.infer_dtype from
-        # "raising cannot infer type" ValueError error when calling:
-        # >>> pd.Series([[0, 1], [1, 2, 3]], dtype='ragged')
-        self._values = self._flat_array
-
     @property
     def flat_array(self):
         """
@@ -349,7 +344,7 @@ def _concat_same_type(cls, to_concat):
 
     @property
     def dtype(self):
-        return RaggedDtype
+        return RaggedDtype()
 
     @property
     def nbytes(self):
@@ -359,3 +354,11 @@ def nbytes(self):
         return (self._flat_array.nbytes +
                 self._start_indices.nbytes +
                 self._mask.nbytes)
+
+    def astype(self, dtype, copy=True):
+        if isinstance(dtype, RaggedDtype):
+            if copy:
+                return self.copy()
+            return self
+
+        return np.array(self, dtype=dtype, copy=copy)
diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index 83b9e7c5c..a9915d762 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -1,6 +1,8 @@
 import pytest
 import numpy as np
 import pandas as pd
+from pandas.tests.extension.base import BaseDtypeTests
+
 from datashader.datatypes import RaggedDtype, RaggedArray
 
 
@@ -61,7 +63,7 @@ def test_construct_ragged_array():
     assert rarray.nbytes == expected
 
     # Check dtype
-    assert rarray.dtype == RaggedDtype
+    assert type(rarray.dtype) == RaggedDtype
 
 
 def test_start_indices_dtype():
@@ -303,3 +305,61 @@ def test_concat_series():
                          index=[0, 1, 2, 3, 4, 0, 1, 2, 0, 1])
 
     pd.testing.assert_series_equal(s_concat, expected)
+
+
+# Pandas-provided extension array tests
+# -------------------------------------
+# See http://pandas-docs.github.io/pandas-docs-travis/extending.html
+@pytest.fixture
+def dtype():
+    """A fixture providing the ExtensionDtype to validate."""
+    return RaggedDtype()
+
+
+@pytest.fixture
+def data():
+    """Length-100 array for this type.
+        * data[0] and data[1] should both be non missing
+        * data[0] and data[1] should not gbe equal
+        """
+    return RaggedArray(
+        [[0, 1], [1, 2, 3, 4], [], None, [-1, -2]]*20, dtype='float64')
+
+
+@pytest.fixture
+def data_missing():
+    """Length-2 array with [NA, Valid]"""
+    return RaggedArray([None, [-1, 0, 1]], dtype='int16')
+
+
+@pytest.fixture
+def data_for_sorting():
+    """Length-3 array with a known sort order.
+    This should be three items [B, C, A] with
+    A < B < C
+    """
+    return RaggedArray([[1, 0], [2, 0], [0, 0]])
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    """Length-3 array with a known sort order.
+    This should be three items [B, NA, A] with
+    A < B and NA missing.
+    """
+    return RaggedArray([[1, 0], None, [0, 0]])
+
+
+@pytest.fixture
+def data_for_grouping():
+    """Data for factorization, grouping, and unique tests.
+    Expected to be like [B, B, NA, NA, A, A, B, C]
+    Where A < B < C and NA is missing
+    """
+    return RaggedArray(
+        [[1, 0], [1, 0], None, None, [0, 0], [0, 0], [1, 0], [2, 0]])
+
+
+# Subclass BaseDtypeTests to run pandas-provided extension array test suite
+class TestRaggedDtype(BaseDtypeTests):
+    pass

From 2f18587154ee694224b8784efdb17ee119988bdd Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Sun, 13 Jan 2019 18:26:21 -0500
Subject: [PATCH 04/45] Import register_extension_dtype from pandas public
 location

---
 datashader/datatypes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 3ef8171f3..ddb47d72c 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -1,6 +1,6 @@
 import numpy as np
-from pandas.api.extensions import ExtensionDtype, ExtensionArray
-from pandas.core.dtypes.dtypes import register_extension_dtype
+from pandas.api.extensions import (
+    ExtensionDtype, ExtensionArray, register_extension_dtype)
 from numbers import Integral
 
 

From 5f46b8e85d6ddac4157ed4b047ed72a079b1aeab Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Mon, 14 Jan 2019 10:28:51 -0500
Subject: [PATCH 05/45] Fix copy/paste error

---
 datashader/datatypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index ddb47d72c..2324d86e9 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -156,7 +156,7 @@ def __getitem__(self, item):
             else:
                 # Convert negative item index
                 if item < 0:
-                    item = 5 + item
+                    item = len(self) + item
 
                 slice_start = self.start_indices[item]
                 slice_end = (self.start_indices[item+1]

From a6b3c27447bb4c99d5640df094ac99dc1126aa92 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Mon, 14 Jan 2019 10:29:15 -0500
Subject: [PATCH 06/45] KeyError -> IndexError

---
 datashader/datatypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 2324d86e9..ac57edd02 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -183,7 +183,7 @@ def __getitem__(self, item):
 
             return RaggedArray(data, dtype=self.flat_array.dtype)
         else:
-            raise KeyError(item)
+            raise IndexError(item)
 
     @classmethod
     def _from_sequence(cls, scalars, dtype=None, copy=False):

From fbc50659ead54e466867081839bcbd6cd4e75407 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Mon, 14 Jan 2019 12:04:07 -0500
Subject: [PATCH 07/45] Document, validate, and test fast-path RaggedArray
 construction from start_indices, flat_array, and mask arrays

---
 datashader/datatypes.py            | 105 +++++++++++++++++++++++++++--
 datashader/tests/test_datatypes.py | 101 +++++++++++++++++++++++++++
 2 files changed, 199 insertions(+), 7 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index ac57edd02..743810663 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -4,6 +4,79 @@
 from numbers import Integral
 
 
+def _validate_ragged_properties(data):
+    """
+    Validate that dict contains the necessary properties to construct a
+    RaggedArray.
+
+    Parameters
+    ----------
+    data: dict
+        A dict containing 'mask', 'start_indices', and 'flat_array' keys
+        with numpy array values
+
+    Raises
+    ------
+    ValueError:
+        if input contains invalid or incompatible properties
+    """
+    # Validate mask
+    mask = data['mask']
+
+    if (not isinstance(mask, np.ndarray) or
+            mask.dtype != 'bool' or
+            mask.ndim != 1):
+        raise ValueError("""
+The mask property of a RaggedArray must be a 1D numpy array with dtype=='bool'
+    Received value of type {typ}: {v}""".format(
+            typ=type(mask), v=repr(mask)))
+
+    # Validate start_indices
+    start_indices = data['start_indices']
+
+    if (not isinstance(start_indices, np.ndarray) or
+            start_indices.dtype.kind != 'u' or
+            start_indices.ndim != 1):
+        raise ValueError("""
+The start_indices property of a RaggedArray must be a 1D numpy array of
+unsigned integers (start_indices.dtype.kind == 'u')
+    Received value of type {typ}: {v}""".format(
+            typ=type(start_indices), v=repr(start_indices)))
+
+    if len(mask) != len(start_indices):
+        raise ValueError("""
+The length of the mask and start_indices arrays must be equal
+    len(mask): {mask_len}
+    len(start_indices): {start_indices_len}""".format(
+            mask_len=len(mask), start_indices_len=len(start_indices)))
+
+    # Validate flat_array
+    flat_array = data['flat_array']
+
+    if (not isinstance(flat_array, np.ndarray) or
+            flat_array.ndim != 1):
+        raise ValueError("""
+The flat_array property of a RaggedArray must be a 1D numpy array
+    Received value of type {typ}: {v}""".format(
+            typ=type(flat_array), v=repr(flat_array)))
+
+    # Validate start_indices values
+    # We don't need to check start_indices < 0 because we already know that it
+    # has an unsigned integer datatype
+    #
+    # Note that start_indices[i] == len(flat_array) is valid as it represents
+    # and empty array element at the end of the ragged array.
+    invalid_inds = start_indices > len(flat_array)
+
+    if invalid_inds.any():
+        some_invalid_vals = start_indices[invalid_inds[:10]]
+
+        raise ValueError("""
+Elements of start_indices must be less than the length of flat_array ({m})
+    Invalid values include: {vals}""".format(
+            m=len(flat_array), vals=repr(some_invalid_vals)))
+
+
 @register_extension_dtype
 class RaggedDtype(ExtensionDtype):
     name = 'ragged'
@@ -30,17 +103,35 @@ def __init__(self, data, dtype=None):
 
         Parameters
         ----------
-        data
-            List or numpy array of lists or numpy arrays
+        data:
+            * list or 1D-array: A List or 1D array of lists or 1D arrays that
+                                should be represented by the RaggedArray
+
+            * dict: A dict containing 'mask', 'start_indices',
+                    and 'flat_array' keys with numpy array values where:
+                    - mask: boolean numpy array the same length as the
+                            ragged array where values of True indicate
+                            missing values
+                    - flat_array:  numpy array containing concatenation
+                                   of all nested arrays to be represented
+                                   by this ragged array
+                    - start_indices: unsiged integer numpy array the same
+                                     length as the ragged array where values
+                                     represent the index into flat_array where
+                                     the corresponding ragged array element
+                                     begins
+
         dtype: np.dtype or str or None (default None)
             Datatype to use to store underlying values from data.
             If none (the default) then dtype will be determined using the
-            numpy.result_type function
+            numpy.result_type function.
         """
         if (isinstance(data, dict) and
                 all(k in data for k in
                     ['mask', 'start_indices', 'flat_array'])):
 
+            _validate_ragged_properties(data)
+
             self._mask = data['mask']
             self._start_indices = data['start_indices']
             self._flat_array = data['flat_array']
@@ -105,7 +196,7 @@ def flat_array(self):
     def mask(self):
         """
         boolean numpy array the same length as the ragged array where values
-        of True indicate missing values.
+        of True indicate missing values
 
         Returns
         -------
@@ -116,9 +207,9 @@ def mask(self):
     @property
     def start_indices(self):
         """
-        integer numpy array the same length as the ragged array where values
-        represent the index into flat_array where the corresponding ragged
-        array element begins.
+        unsiged integer numpy array the same length as the ragged array where
+        values represent the index into flat_array where the corresponding
+        ragged array element begins
 
         Returns
         -------
diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index a9915d762..f0ad03837 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -66,6 +66,107 @@ def test_construct_ragged_array():
     assert type(rarray.dtype) == RaggedDtype
 
 
+def test_construct_ragged_array_fastpath():
+
+    mask = np.array([False, False, False, True, False, False])
+    start_indices = np.array([0, 2, 5, 6, 6, 11], dtype='uint16')
+    flat_array = np.array(
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='float32')
+
+    rarray = RaggedArray(
+        dict(mask=mask, start_indices=start_indices, flat_array=flat_array))
+
+    # Check that arrays were accepted unchanged
+    assert np.array_equal(rarray.mask, mask)
+    assert np.array_equal(rarray.start_indices, start_indices)
+    assert np.array_equal(rarray.flat_array, flat_array)
+
+    # Check interpretation as ragged array
+    object_array = np.asarray(rarray)
+    expected_lists = [[0, 1], [2, 3, 4], [5], None, [6, 7, 8, 9, 10], []]
+    expected_array = np.array([np.array(v, dtype='float32')
+                               if v is not None else None
+                               for v in expected_lists], dtype='object')
+
+    assert len(object_array) == len(expected_array)
+    for a1, a2 in zip(object_array, expected_array):
+        assert np.array_equal(a1, a2)
+
+
+def test_validate_ragged_array_fastpath():
+    mask = np.array([False, False, False, True, False, False])
+    start_indices = np.array([0, 2, 5, 6, 6, 11], dtype='uint16')
+    flat_array = np.array(
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='float32')
+
+    valid_dict = dict(
+        mask=mask, start_indices=start_indices, flat_array=flat_array)
+
+    # Valid args
+    RaggedArray(valid_dict)
+
+    # ## mask validation ##
+    #
+    # not ndarray
+    with pytest.raises(ValueError) as ve:
+        RaggedArray(dict(valid_dict, mask=25))
+    ve.match('mask property of a RaggedArray')
+
+    # not boolean
+    with pytest.raises(ValueError) as ve:
+        RaggedArray(dict(valid_dict, mask=mask.astype('float32')))
+    ve.match('mask property of a RaggedArray')
+
+    # not 1d
+    with pytest.raises(ValueError) as ve:
+        RaggedArray(dict(valid_dict, mask=np.array([mask])))
+    ve.match('mask property of a RaggedArray')
+
+    # ## start_indices validation ##
+    #
+    # not ndarray
+    with pytest.raises(ValueError) as ve:
+        RaggedArray(dict(valid_dict, start_indices=25))
+    ve.match('start_indices property of a RaggedArray')
+
+    # not unsiged int
+    with pytest.raises(ValueError) as ve:
+        RaggedArray(dict(valid_dict,
+                         start_indices=start_indices.astype('float32')))
+    ve.match('start_indices property of a RaggedArray')
+
+    # not 1d
+    with pytest.raises(ValueError) as ve:
+        RaggedArray(dict(valid_dict, start_indices=np.array([start_indices])))
+    ve.match('start_indices property of a RaggedArray')
+
+    # ## flat_array validation ##
+    #
+    # not ndarray
+    with pytest.raises(ValueError) as ve:
+        RaggedArray(dict(valid_dict, flat_array='foo'))
+    ve.match('flat_array property of a RaggedArray')
+
+    # not 1d
+    with pytest.raises(ValueError) as ve:
+        RaggedArray(dict(valid_dict, flat_array=np.array([flat_array])))
+    ve.match('flat_array property of a RaggedArray')
+
+    # ## matching length validation ##
+    #
+    with pytest.raises(ValueError) as ve:
+        RaggedArray(dict(valid_dict, start_indices=start_indices[:-1]))
+    ve.match('length of the mask and start_indices arrays must be equal')
+
+    # ## start_indices out of bounds validation ##
+    #
+    bad_start_indices = start_indices.copy()
+    bad_start_indices[-1] = 99
+    with pytest.raises(ValueError) as ve:
+        RaggedArray(dict(valid_dict, start_indices=bad_start_indices))
+    ve.match('start_indices must be less than')
+
+
 def test_start_indices_dtype():
     # The start_indices dtype should be an unsiged int that is only as large
     # as needed to handle the length of the flat array

From 527e9d64f587fe615c0eca46b70c26ba0299e03e Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Mon, 14 Jan 2019 14:35:36 -0500
Subject: [PATCH 08/45] Support indexing RaggedArray with a list

---
 datashader/datatypes.py            |  2 ++
 datashader/tests/test_datatypes.py | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 743810663..fd8b87807 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -273,6 +273,8 @@ def __getitem__(self, item):
                     data.append(self[i])
 
             return RaggedArray(data, dtype=self.flat_array.dtype)
+        elif isinstance(item, (list, np.ndarray)):
+            return self.take(item, allow_fill=False)
         else:
             raise IndexError(item)
 
diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index f0ad03837..c1d099a77 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -289,6 +289,22 @@ def test_get_item_mask(mask):
         RaggedArray(arg[mask], dtype='int16'))
 
 
+@pytest.mark.parametrize('inds', [
+    [1, 2, 1, 4],
+    np.array([1, 2, 1, 4]),
+    [],
+    np.array([], dtype='int32'),
+    [4, 3, 2, 1, 0]
+])
+def test_get_item_list(inds):
+    arg = np.array([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]])
+    rarray = RaggedArray(arg, dtype='int16')
+
+    assert_ragged_arrays_equal(
+        rarray[inds],
+        RaggedArray(arg[inds], dtype='int16'))
+
+
 # _from_factorized
 # ----------------
 def test_factorization():

From 8d1c34bcb194337e2ee3b9d267a289a559a77df4 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Mon, 14 Jan 2019 15:08:32 -0500
Subject: [PATCH 09/45] Create single RaggedDtype() instance per RaggedArray

---
 datashader/datatypes.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index fd8b87807..91f538a78 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -126,6 +126,7 @@ def __init__(self, data, dtype=None):
             If none (the default) then dtype will be determined using the
             numpy.result_type function.
         """
+        self._dtype = RaggedDtype()
         if (isinstance(data, dict) and
                 all(k in data for k in
                     ['mask', 'start_indices', 'flat_array'])):
@@ -437,7 +438,7 @@ def _concat_same_type(cls, to_concat):
 
     @property
     def dtype(self):
-        return RaggedDtype()
+        return self._dtype
 
     @property
     def nbytes(self):

From dad6cc29ea37ba4ac4a0ff69d4a6658b731b343f Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Mon, 14 Jan 2019 15:13:20 -0500
Subject: [PATCH 10/45] Allow astype() to cast RaggedArray to other extension
 array types

---
 datashader/datatypes.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 91f538a78..9d253abb1 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -3,6 +3,9 @@
     ExtensionDtype, ExtensionArray, register_extension_dtype)
 from numbers import Integral
 
+from pandas.api.types import pandas_dtype
+from pandas.core.dtypes.common import is_extension_array_dtype
+
 
 def _validate_ragged_properties(data):
     """
@@ -450,9 +453,14 @@ def nbytes(self):
                 self._mask.nbytes)
 
     def astype(self, dtype, copy=True):
+
+        dtype = pandas_dtype(dtype)
         if isinstance(dtype, RaggedDtype):
             if copy:
                 return self.copy()
             return self
 
+        elif is_extension_array_dtype(dtype):
+            dtype.construct_array_type()._from_sequence(np.asarray(self))
+
         return np.array(self, dtype=dtype, copy=copy)

From fff0c3ec522c38ae1a254c5e135c4948e2d3dc3b Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Mon, 14 Jan 2019 15:21:33 -0500
Subject: [PATCH 11/45] Allow RaggedArray constructor to accept a RaggedArray
 to copy

---
 datashader/datatypes.py            |  7 ++++++-
 datashader/tests/test_datatypes.py | 12 ++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 9d253abb1..2d2ad5be1 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -106,7 +106,7 @@ def __init__(self, data, dtype=None):
 
         Parameters
         ----------
-        data:
+        data: list or array or dict or RaggedArray
             * list or 1D-array: A List or 1D array of lists or 1D arrays that
                                 should be represented by the RaggedArray
 
@@ -123,6 +123,7 @@ def __init__(self, data, dtype=None):
                                      represent the index into flat_array where
                                      the corresponding ragged array element
                                      begins
+            * RaggedArray: A RaggedArray instance to copy
 
         dtype: np.dtype or str or None (default None)
             Datatype to use to store underlying values from data.
@@ -139,6 +140,10 @@ def __init__(self, data, dtype=None):
             self._mask = data['mask']
             self._start_indices = data['start_indices']
             self._flat_array = data['flat_array']
+        elif isinstance(data, RaggedArray):
+            self._mask = data.mask.copy()
+            self._flat_array = data.flat_array.copy()
+            self._start_indices = data.start_indices.copy()
         else:
             # Compute lengths
             index_len = len(data)
diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index c1d099a77..ab4a36a4c 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -14,6 +14,10 @@ def assert_ragged_arrays_equal(ra1, ra2):
     assert np.array_equal(ra1.flat_array, ra2.flat_array)
     assert np.array_equal(ra1.flat_array.dtype, ra2.flat_array.dtype)
 
+    # Make sure ragged elements are equal when iterated over
+    for a1, a2 in zip(ra1, ra2):
+        assert np.array_equal(a1, a2)
+
 
 # Test constructor and properties
 # -------------------------------
@@ -66,6 +70,14 @@ def test_construct_ragged_array():
     assert type(rarray.dtype) == RaggedDtype
 
 
+def test_construct_ragged_array_from_ragged_array():
+    rarray = RaggedArray([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]],
+                         dtype='int32')
+
+    result = RaggedArray(rarray)
+    assert_ragged_arrays_equal(result, rarray)
+
+
 def test_construct_ragged_array_fastpath():
 
     mask = np.array([False, False, False, True, False, False])

From 478b65532870183abd30ccd9edadd89e31d330ea Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Mon, 14 Jan 2019 18:47:52 -0500
Subject: [PATCH 12/45] Remove mask property and consider missing to be
 equivalent to empty

---
 datashader/datatypes.py            | 84 +++++++++---------------------
 datashader/tests/test_datatypes.py | 83 ++++++++++++-----------------
 2 files changed, 57 insertions(+), 110 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 2d2ad5be1..c672291e1 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -15,7 +15,7 @@ def _validate_ragged_properties(data):
     Parameters
     ----------
     data: dict
-        A dict containing 'mask', 'start_indices', and 'flat_array' keys
+        A dict containing 'start_indices', and 'flat_array' keys
         with numpy array values
 
     Raises
@@ -23,16 +23,6 @@ def _validate_ragged_properties(data):
     ValueError:
         if input contains invalid or incompatible properties
     """
-    # Validate mask
-    mask = data['mask']
-
-    if (not isinstance(mask, np.ndarray) or
-            mask.dtype != 'bool' or
-            mask.ndim != 1):
-        raise ValueError("""
-The mask property of a RaggedArray must be a 1D numpy array with dtype=='bool'
-    Received value of type {typ}: {v}""".format(
-            typ=type(mask), v=repr(mask)))
 
     # Validate start_indices
     start_indices = data['start_indices']
@@ -46,13 +36,6 @@ def _validate_ragged_properties(data):
     Received value of type {typ}: {v}""".format(
             typ=type(start_indices), v=repr(start_indices)))
 
-    if len(mask) != len(start_indices):
-        raise ValueError("""
-The length of the mask and start_indices arrays must be equal
-    len(mask): {mask_len}
-    len(start_indices): {start_indices_len}""".format(
-            mask_len=len(mask), start_indices_len=len(start_indices)))
-
     # Validate flat_array
     flat_array = data['flat_array']
 
@@ -99,6 +82,10 @@ def construct_from_string(cls, string):
                             .format(cls, string))
 
 
+def missing(v):
+    return v is None or (np.isscalar(v) and np.isnan(v))
+
+
 class RaggedArray(ExtensionArray):
     def __init__(self, data, dtype=None):
         """
@@ -110,11 +97,8 @@ def __init__(self, data, dtype=None):
             * list or 1D-array: A List or 1D array of lists or 1D arrays that
                                 should be represented by the RaggedArray
 
-            * dict: A dict containing 'mask', 'start_indices',
-                    and 'flat_array' keys with numpy array values where:
-                    - mask: boolean numpy array the same length as the
-                            ragged array where values of True indicate
-                            missing values
+            * dict: A dict containing 'start_indices' and 'flat_array' keys
+                    with numpy array values where:
                     - flat_array:  numpy array containing concatenation
                                    of all nested arrays to be represented
                                    by this ragged array
@@ -133,22 +117,20 @@ def __init__(self, data, dtype=None):
         self._dtype = RaggedDtype()
         if (isinstance(data, dict) and
                 all(k in data for k in
-                    ['mask', 'start_indices', 'flat_array'])):
+                    ['start_indices', 'flat_array'])):
 
             _validate_ragged_properties(data)
 
-            self._mask = data['mask']
             self._start_indices = data['start_indices']
             self._flat_array = data['flat_array']
         elif isinstance(data, RaggedArray):
-            self._mask = data.mask.copy()
             self._flat_array = data.flat_array.copy()
             self._start_indices = data.start_indices.copy()
         else:
             # Compute lengths
             index_len = len(data)
             buffer_len = sum(len(datum)
-                             if datum is not None
+                             if not missing(datum)
                              else 0 for datum in data)
 
             # Compute necessary precision of start_indices array
@@ -162,24 +144,17 @@ def __init__(self, data, dtype=None):
             if dtype is None:
                 dtype = np.result_type(*[np.atleast_1d(v)
                                          for v in data
-                                         if v is not None])
+                                         if not missing(v)])
 
             # Initialize representation arrays
-            self._mask = np.zeros(index_len, dtype='bool')
             self._start_indices = np.zeros(index_len, dtype=start_indices_dtype)
             self._flat_array = np.zeros(buffer_len, dtype=dtype)
 
             # Populate arrays
             next_start_ind = 0
             for i, array_el in enumerate(data):
-                # Check for null values
-                isnull = array_el is None
-
                 # Compute element length
-                n = len(array_el) if not isnull else 0
-
-                # Update mask
-                self._mask[i] = isnull
+                n = len(array_el) if not missing(array_el) else 0
 
                 # Update start indices
                 self._start_indices[i] = next_start_ind
@@ -201,18 +176,6 @@ def flat_array(self):
         """
         return self._flat_array
 
-    @property
-    def mask(self):
-        """
-        boolean numpy array the same length as the ragged array where values
-        of True indicate missing values
-
-        Returns
-        -------
-        np.ndarray
-        """
-        return self._mask
-
     @property
     def start_indices(self):
         """
@@ -251,8 +214,6 @@ def __getitem__(self, item):
         if isinstance(item, Integral):
             if item < -len(self) or item >= len(self):
                 raise IndexError(item)
-            elif self.mask[item]:
-                return None
             else:
                 # Convert negative item index
                 if item < 0:
@@ -336,8 +297,7 @@ def _values_for_factorize(self):
         #
         # Perhaps we could replace these tuples with a class that provides a
         # read-only view of an ndarray slice and provides a hash function.
-        return [tuple(self[i]) if not self.mask[i] else None
-                for i in range(len(self))], None
+        return [tuple(self[i]) for i in range(len(self))], None
 
     def isna(self):
         """
@@ -349,7 +309,11 @@ def isna(self):
             boolean ndarray the same length as the ragged array where values
             of True represent missing/NA values.
         """
-        return self.mask
+        stop_indices = np.hstack([self.start_indices[1:],
+                                  [len(self.flat_array)]])
+
+        element_lengths = stop_indices - self.start_indices
+        return element_lengths == 0
 
     def take(self, indices, allow_fill=False, fill_value=None):
         """
@@ -383,6 +347,11 @@ def take(self, indices, allow_fill=False, fill_value=None):
             When the indices are out of bounds for the array.
         """
         if allow_fill:
+            invalid_inds = [i for i in indices if i < -1]
+            if invalid_inds:
+                raise ValueError("""
+Invalid indices for take with allow_fill True: {inds}""".format(
+                    inds=invalid_inds[:9]))
             sequence = [self[i] if i >= 0 else fill_value
                         for i in indices]
         else:
@@ -404,7 +373,6 @@ def copy(self, deep=False):
         RaggedArray
         """
         data = dict(
-            mask=self.mask,
             flat_array=self.flat_array,
             start_indices=self.start_indices)
 
@@ -427,9 +395,6 @@ def _concat_same_type(cls, to_concat):
         -------
         RaggedArray
         """
-        # concat masks
-        mask = np.hstack(ra.mask for ra in to_concat)
-
         # concat flat_arrays
         flat_array = np.hstack(ra.flat_array for ra in to_concat)
 
@@ -442,7 +407,7 @@ def _concat_same_type(cls, to_concat):
                                    for offset, ra in zip(offsets, to_concat)])
 
         return RaggedArray(dict(
-            mask=mask, flat_array=flat_array, start_indices=start_indices))
+            flat_array=flat_array, start_indices=start_indices))
 
     @property
     def dtype(self):
@@ -454,8 +419,7 @@ def nbytes(self):
         The number of bytes needed to store this object in memory.
         """
         return (self._flat_array.nbytes +
-                self._start_indices.nbytes +
-                self._mask.nbytes)
+                self._start_indices.nbytes)
 
     def astype(self, dtype, copy=True):
 
diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index ab4a36a4c..f5785eaab 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -9,7 +9,6 @@
 # Testing helpers
 # ---------------
 def assert_ragged_arrays_equal(ra1, ra2):
-    assert np.array_equal(ra1.mask, ra2.mask)
     assert np.array_equal(ra1.start_indices, ra2.start_indices)
     assert np.array_equal(ra1.flat_array, ra2.flat_array)
     assert np.array_equal(ra1.flat_array.dtype, ra2.flat_array.dtype)
@@ -44,25 +43,18 @@ def test_construct_ragged_array():
         rarray.start_indices,
         np.array([0, 2, 2, 5, 5], dtype='uint64'))
 
-    # Check mask
-    assert rarray.mask.dtype == 'bool'
-    assert np.array_equal(
-        rarray.mask,
-        np.array([False, False, False, True, False], dtype='bool'))
-
     # Check len
     assert len(rarray) == 5
 
     # Check isna
     assert rarray.isna().dtype == 'bool'
     assert np.array_equal(
-        rarray.isna(), [False, False, False, True, False])
+        rarray.isna(), [False, True, False, True, False])
 
     # Check nbytes
     expected = (
             9 * np.int32().nbytes +  # flat_array
-            5 * np.uint8().nbytes +  # start_indices
-            5                        # mask
+            5 * np.uint8().nbytes    # start_indices
     )
     assert rarray.nbytes == expected
 
@@ -80,24 +72,21 @@ def test_construct_ragged_array_from_ragged_array():
 
 def test_construct_ragged_array_fastpath():
 
-    mask = np.array([False, False, False, True, False, False])
     start_indices = np.array([0, 2, 5, 6, 6, 11], dtype='uint16')
     flat_array = np.array(
         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='float32')
 
     rarray = RaggedArray(
-        dict(mask=mask, start_indices=start_indices, flat_array=flat_array))
+        dict(start_indices=start_indices, flat_array=flat_array))
 
     # Check that arrays were accepted unchanged
-    assert np.array_equal(rarray.mask, mask)
     assert np.array_equal(rarray.start_indices, start_indices)
     assert np.array_equal(rarray.flat_array, flat_array)
 
     # Check interpretation as ragged array
     object_array = np.asarray(rarray)
-    expected_lists = [[0, 1], [2, 3, 4], [5], None, [6, 7, 8, 9, 10], []]
+    expected_lists = [[0, 1], [2, 3, 4], [5], [], [6, 7, 8, 9, 10], []]
     expected_array = np.array([np.array(v, dtype='float32')
-                               if v is not None else None
                                for v in expected_lists], dtype='object')
 
     assert len(object_array) == len(expected_array)
@@ -106,34 +95,15 @@ def test_construct_ragged_array_fastpath():
 
 
 def test_validate_ragged_array_fastpath():
-    mask = np.array([False, False, False, True, False, False])
     start_indices = np.array([0, 2, 5, 6, 6, 11], dtype='uint16')
     flat_array = np.array(
         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='float32')
 
-    valid_dict = dict(
-        mask=mask, start_indices=start_indices, flat_array=flat_array)
+    valid_dict = dict(start_indices=start_indices, flat_array=flat_array)
 
     # Valid args
     RaggedArray(valid_dict)
 
-    # ## mask validation ##
-    #
-    # not ndarray
-    with pytest.raises(ValueError) as ve:
-        RaggedArray(dict(valid_dict, mask=25))
-    ve.match('mask property of a RaggedArray')
-
-    # not boolean
-    with pytest.raises(ValueError) as ve:
-        RaggedArray(dict(valid_dict, mask=mask.astype('float32')))
-    ve.match('mask property of a RaggedArray')
-
-    # not 1d
-    with pytest.raises(ValueError) as ve:
-        RaggedArray(dict(valid_dict, mask=np.array([mask])))
-    ve.match('mask property of a RaggedArray')
-
     # ## start_indices validation ##
     #
     # not ndarray
@@ -164,12 +134,6 @@ def test_validate_ragged_array_fastpath():
         RaggedArray(dict(valid_dict, flat_array=np.array([flat_array])))
     ve.match('flat_array property of a RaggedArray')
 
-    # ## matching length validation ##
-    #
-    with pytest.raises(ValueError) as ve:
-        RaggedArray(dict(valid_dict, start_indices=start_indices[:-1]))
-    ve.match('length of the mask and start_indices arrays must be equal')
-
     # ## start_indices out of bounds validation ##
     #
     bad_start_indices = start_indices.copy()
@@ -230,6 +194,16 @@ def test_flat_array_type_inference(arg, expected):
     assert rarray.flat_array.dtype == np.dtype(expected)
 
 
+# isna
+# -----
+def test_isna():
+    rarray = RaggedArray([[], [1, 3], [10, 20, 30],
+                          None, [11, 22, 33, 44], []], dtype='int32')
+
+    assert np.array_equal(rarray.isna(),
+                          np.array([True, False, False, True, False, True]))
+
+
 # __getitem__
 # -----------
 def test_get_item_scalar():
@@ -240,19 +214,19 @@ def test_get_item_scalar():
     for i, expected in enumerate(arg):
         result = rarray[i]
         if expected is None:
-            assert result is None
-        else:
-            assert result.dtype == 'float16'
-            assert np.array_equal(result, expected)
+            expected = np.array([], dtype='float16')
+
+        assert result.dtype == 'float16'
+        assert np.array_equal(result, expected)
 
     # Reversed
     for i, expected in enumerate(arg):
         result = rarray[i - 5]
         if expected is None:
-            assert result is None
-        else:
-            assert result.dtype == 'float16'
-            assert np.array_equal(result, expected)
+            expected = np.array([], dtype='float16')
+
+        assert result.dtype == 'float16'
+        assert np.array_equal(result, expected)
 
 
 @pytest.mark.parametrize('index', [-1000, -6, 5, 1000])
@@ -324,7 +298,7 @@ def test_factorization():
     rarray = RaggedArray(arg, dtype='int16')
     labels, uniques = rarray.factorize()
 
-    assert np.array_equal(labels, [0, 1, 0, -1, 2])
+    assert np.array_equal(labels, [0, 1, 0, 1, 2])
     assert_ragged_arrays_equal(
         uniques, RaggedArray([[1, 2], [], [11, 22, 33, 44]], dtype='int16'))
 
@@ -461,6 +435,15 @@ def data_missing():
     return RaggedArray([None, [-1, 0, 1]], dtype='int16')
 
 
+@pytest.fixture(params=['data', 'data_missing'])
+def all_data(request, data, data_missing):
+    """Parametrized fixture giving 'data' and 'data_missing'"""
+    if request.param == 'data':
+        return data
+    elif request.param == 'data_missing':
+        return data_missing
+
+
 @pytest.fixture
 def data_for_sorting():
     """Length-3 array with a known sort order.

From 9d84b3cfbab28cb21256920baed6dca22ced0ba7 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Mon, 14 Jan 2019 19:24:25 -0500
Subject: [PATCH 13/45] More test fixes for `[]` being null

---
 datashader/datatypes.py            | 11 +++++++----
 datashader/tests/test_datatypes.py | 12 ++++++------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index c672291e1..51ef02922 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -142,9 +142,12 @@ def __init__(self, data, dtype=None):
 
             # infer dtype if not provided
             if dtype is None:
-                dtype = np.result_type(*[np.atleast_1d(v)
-                                         for v in data
-                                         if not missing(v)])
+                non_missing = [np.atleast_1d(v)
+                               for v in data if not missing(v)]
+                if non_missing:
+                    dtype = np.result_type(*non_missing)
+                else:
+                    dtype = 'float64'
 
             # Initialize representation arrays
             self._start_indices = np.zeros(index_len, dtype=start_indices_dtype)
@@ -297,7 +300,7 @@ def _values_for_factorize(self):
         #
         # Perhaps we could replace these tuples with a class that provides a
         # read-only view of an ndarray slice and provides a hash function.
-        return [tuple(self[i]) for i in range(len(self))], None
+        return [tuple(self[i]) for i in range(len(self))], ()
 
     def isna(self):
         """
diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index f5785eaab..8139d9070 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -298,9 +298,9 @@ def test_factorization():
     rarray = RaggedArray(arg, dtype='int16')
     labels, uniques = rarray.factorize()
 
-    assert np.array_equal(labels, [0, 1, 0, 1, 2])
+    assert np.array_equal(labels, [0, -1, 0, -1, 1])
     assert_ragged_arrays_equal(
-        uniques, RaggedArray([[1, 2], [], [11, 22, 33, 44]], dtype='int16'))
+        uniques, RaggedArray([[1, 2], [11, 22, 33, 44]], dtype='int16'))
 
 
 # _from_sequence
@@ -426,13 +426,13 @@ def data():
         * data[0] and data[1] should not gbe equal
         """
     return RaggedArray(
-        [[0, 1], [1, 2, 3, 4], [], None, [-1, -2]]*20, dtype='float64')
+        [[0, 1], [1, 2, 3, 4], [], [-1, -2], []]*20, dtype='float64')
 
 
 @pytest.fixture
 def data_missing():
     """Length-2 array with [NA, Valid]"""
-    return RaggedArray([None, [-1, 0, 1]], dtype='int16')
+    return RaggedArray([[], [-1, 0, 1]], dtype='int16')
 
 
 @pytest.fixture(params=['data', 'data_missing'])
@@ -459,7 +459,7 @@ def data_missing_for_sorting():
     This should be three items [B, NA, A] with
     A < B and NA missing.
     """
-    return RaggedArray([[1, 0], None, [0, 0]])
+    return RaggedArray([[1, 0], [], [0, 0]])
 
 
 @pytest.fixture
@@ -469,7 +469,7 @@ def data_for_grouping():
     Where A < B < C and NA is missing
     """
     return RaggedArray(
-        [[1, 0], [1, 0], None, None, [0, 0], [0, 0], [1, 0], [2, 0]])
+        [[1, 0], [1, 0], [], [], [0, 0], [0, 0], [1, 0], [2, 0]])
 
 
 # Subclass BaseDtypeTests to run pandas-provided extension array test suite

From d71f86631a058c4c9d242eb80b6b45dbe4d66e2e Mon Sep 17 00:00:00 2001
From: "James A. Bednar" <jbednar@users.noreply.github.com>
Date: Mon, 14 Jan 2019 20:27:19 -0500
Subject: [PATCH 14/45] Update datashader/datatypes.py

Co-Authored-By: jonmmease <jon.mease@gmail.com>
---
 datashader/datatypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 51ef02922..2ad3329a4 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -15,7 +15,7 @@ def _validate_ragged_properties(data):
     Parameters
     ----------
     data: dict
-        A dict containing 'start_indices', and 'flat_array' keys
+        A dict containing 'start_indices' and 'flat_array' keys
         with numpy array values
 
     Raises

From 4cd7b4c48f9f9d18244b79b17ffd33872a2ef9b9 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Tue, 15 Jan 2019 20:26:46 -0500
Subject: [PATCH 15/45] Add RaggedElement wrapper class for internal pandas
 operations

Add additional ExtensionArray test suites
---
 datashader/datatypes.py            | 169 ++++++++++++++++++--
 datashader/tests/test_datatypes.py | 242 +++++++++++++++++++++++++++--
 2 files changed, 382 insertions(+), 29 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 2ad3329a4..0d693e716 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -1,3 +1,5 @@
+from functools import total_ordering
+
 import numpy as np
 from pandas.api.extensions import (
     ExtensionDtype, ExtensionArray, register_extension_dtype)
@@ -63,6 +65,48 @@ def _validate_ragged_properties(data):
             m=len(flat_array), vals=repr(some_invalid_vals)))
 
 
+# Internal ragged element array wrapper that provides
+# equality, ordering, and hashing.
+@total_ordering
+class RaggedElement(object):
+
+    @staticmethod
+    def ragged_or_nan(a):
+        if np.isscalar(a) and np.isnan(a):
+            return a
+        else:
+            return RaggedElement(a)
+
+    @staticmethod
+    def array_or_nan(a):
+        if np.isscalar(a) and np.isnan(a):
+            return a
+        else:
+            return a.array
+
+    def __init__(self, array):
+        self.array = array
+
+    def __hash__(self):
+        # TODO: Rewrite using self.array directly without tuple
+        return hash(tuple(self.array))
+
+    def __eq__(self, other):
+        if not isinstance(other, RaggedElement):
+            return False
+        return np.array_equal(self.array, other.array)
+
+    def __lt__(self, other):
+        # TODO: Rewrite using self.array directly without tuples
+        if not isinstance(other, RaggedElement):
+            return NotImplemented
+        return tuple(self.array) < tuple(other.array)
+
+    def __repr__(self):
+        array_repr = repr(self.array)
+        return array_repr.replace('array', 'ragged_element')
+
+
 @register_extension_dtype
 class RaggedDtype(ExtensionDtype):
     name = 'ragged'
@@ -216,7 +260,7 @@ def __getitem__(self, item):
         """
         if isinstance(item, Integral):
             if item < -len(self) or item >= len(self):
-                raise IndexError(item)
+                raise IndexError("{item} is out of bounds".format(item=item))
             else:
                 # Convert negative item index
                 if item < 0:
@@ -227,7 +271,9 @@ def __getitem__(self, item):
                              if item + 1 <= len(self) - 1
                              else len(self.flat_array))
 
-                return self.flat_array[slice_start:slice_end]
+                return (self.flat_array[slice_start:slice_end]
+                        if slice_end!=slice_start
+                        else np.nan)
 
         elif type(item) == slice:
             data = []
@@ -290,17 +336,113 @@ def _from_factorized(cls, values, original):
         pandas.factorize
         ExtensionArray.factorize
         """
-        return RaggedArray(values, dtype=original.flat_array.dtype)
+        return RaggedArray(
+            [RaggedElement.array_or_nan(v) for v in values],
+            dtype=original.flat_array.dtype)
+
+    def _as_ragged_element_array(self):
+        return np.array([RaggedElement.ragged_or_nan(self[i])
+                         for i in range(len(self))])
 
     def _values_for_factorize(self):
-        # Here we return a list of the ragged elements converted into tuples.
-        # This is very inefficient, but the elements of this list must be
-        # hashable, and we must be able to reconstruct a new Ragged Array
-        # from these elements.
-        #
-        # Perhaps we could replace these tuples with a class that provides a
-        # read-only view of an ndarray slice and provides a hash function.
-        return [tuple(self[i]) for i in range(len(self))], ()
+        return self._as_ragged_element_array(), np.nan
+
+    def _values_for_argsort(self):
+        return self._as_ragged_element_array()
+
+    def unique(self):
+        """
+        Compute the ExtensionArray of unique values.
+
+        Returns
+        -------
+        uniques : ExtensionArray
+        """
+        from pandas import unique
+
+        uniques = unique(self._as_ragged_element_array())
+        return self._from_sequence(
+            [RaggedElement.array_or_nan(v) for v in uniques],
+            dtype=self.dtype)
+
+    def shift(self, periods=1, fill_value=None):
+        # type: (int, object) -> ExtensionArray
+        """
+        Shift values by desired number.
+
+        Override in RaggedArray to handle ndarray fill values
+        """
+        # Note: this implementation assumes that `self.dtype.na_value` can be
+        # stored in an instance of your ExtensionArray with `self.dtype`.
+        if not len(self) or periods == 0:
+            return self.copy()
+
+        if fill_value is None:
+            fill_value = np.nan
+
+        empty = self._from_sequence(
+            [fill_value] * min(abs(periods), len(self)),
+            dtype=self.dtype
+        )
+        if periods > 0:
+            a = empty
+            b = self[:-periods]
+        else:
+            a = self[abs(periods):]
+            b = empty
+        return self._concat_same_type([a, b])
+
+    def searchsorted(self, value, side="left", sorter=None):
+        """
+        Find indices where elements should be inserted to maintain order.
+
+        .. versionadded:: 0.24.0
+
+        Find the indices into a sorted array `self` (a) such that, if the
+        corresponding elements in `v` were inserted before the indices, the
+        order of `self` would be preserved.
+
+        Assuming that `a` is sorted:
+
+        ======  ============================
+        `side`  returned index `i` satisfies
+        ======  ============================
+        left    ``self[i-1] < v <= self[i]``
+        right   ``self[i-1] <= v < self[i]``
+        ======  ============================
+
+        Parameters
+        ----------
+        value : array_like
+            Values to insert into `self`.
+        side : {'left', 'right'}, optional
+            If 'left', the index of the first suitable location found is given.
+            If 'right', return the last such index.  If there is no suitable
+            index, return either 0 or N (where N is the length of `self`).
+        sorter : 1-D array_like, optional
+            Optional array of integer indices that sort array a into ascending
+            order. They are typically the result of argsort.
+
+        Returns
+        -------
+        indices : array of ints
+            Array of insertion points with the same shape as `value`.
+
+        See Also
+        --------
+        numpy.searchsorted : Similar method from NumPy.
+        """
+        # Note: the base tests provided by pandas only test the basics.
+        # We do not test
+        # 1. Values outside the range of the `data_for_sorting` fixture
+        # 2. Values between the values in the `data_for_sorting` fixture
+        # 3. Missing values.
+        arr = self._as_ragged_element_array()
+        if isinstance(value, RaggedArray):
+            search_value = value._as_ragged_element_array()
+        else:
+            search_value = RaggedElement(value)
+        return arr.searchsorted(search_value, side=side, sorter=sorter)
 
     def isna(self):
         """
@@ -358,6 +500,9 @@ def take(self, indices, allow_fill=False, fill_value=None):
             sequence = [self[i] if i >= 0 else fill_value
                         for i in indices]
         else:
+            if len(self) == 0 and len(indices) > 0:
+                raise IndexError("cannot do a non-empty take")
+
             sequence = [self[i] for i in indices]
 
         return RaggedArray(sequence, dtype=self.flat_array.dtype)
@@ -435,4 +580,4 @@ def astype(self, dtype, copy=True):
         elif is_extension_array_dtype(dtype):
             dtype.construct_array_type()._from_sequence(np.asarray(self))
 
-        return np.array(self, dtype=dtype, copy=copy)
+        return np.array([v for v in self], dtype=dtype, copy=copy)
diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index 8139d9070..5ab4b15c8 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -1,7 +1,8 @@
 import pytest
 import numpy as np
 import pandas as pd
-from pandas.tests.extension.base import BaseDtypeTests
+import pandas.tests.extension.base as eb
+import pandas.util.testing as tm
 
 from datashader.datatypes import RaggedDtype, RaggedArray
 
@@ -15,7 +16,7 @@ def assert_ragged_arrays_equal(ra1, ra2):
 
     # Make sure ragged elements are equal when iterated over
     for a1, a2 in zip(ra1, ra2):
-        assert np.array_equal(a1, a2)
+        np.testing.assert_array_equal(a1, a2)
 
 
 # Test constructor and properties
@@ -63,7 +64,7 @@ def test_construct_ragged_array():
 
 
 def test_construct_ragged_array_from_ragged_array():
-    rarray = RaggedArray([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]],
+    rarray = RaggedArray([[1, 2], [], [10, 20, 30], np.nan, [11, 22, 33, 44]],
                          dtype='int32')
 
     result = RaggedArray(rarray)
@@ -91,7 +92,7 @@ def test_construct_ragged_array_fastpath():
 
     assert len(object_array) == len(expected_array)
     for a1, a2 in zip(object_array, expected_array):
-        assert np.array_equal(a1, a2)
+        np.testing.assert_array_equal(a1, a2)
 
 
 def test_validate_ragged_array_fastpath():
@@ -150,34 +151,34 @@ def test_start_indices_dtype():
     # Empty
     rarray = RaggedArray([[]], dtype='int64')
     assert rarray.start_indices.dtype == np.dtype('uint8')
-    assert np.array_equal(rarray.start_indices, [0])
+    np.testing.assert_array_equal(rarray.start_indices, [0])
 
     # Small
     rarray = RaggedArray([[23, 24]], dtype='int64')
     assert rarray.start_indices.dtype == np.dtype('uint8')
-    assert np.array_equal(rarray.start_indices, [0])
+    np.testing.assert_array_equal(rarray.start_indices, [0])
 
     # Max uint8
     max_uint8 = np.iinfo('uint8').max
     rarray = RaggedArray([np.zeros(max_uint8), []], dtype='int64')
     assert rarray.start_indices.dtype == np.dtype('uint8')
-    assert np.array_equal(rarray.start_indices, [0, max_uint8])
+    np.testing.assert_array_equal(rarray.start_indices, [0, max_uint8])
 
     # Min uint16
     rarray = RaggedArray([np.zeros(max_uint8 + 1), []], dtype='int64')
     assert rarray.start_indices.dtype == np.dtype('uint16')
-    assert np.array_equal(rarray.start_indices, [0, max_uint8 + 1])
+    np.testing.assert_array_equal(rarray.start_indices, [0, max_uint8 + 1])
 
     # Max uint16
     max_uint16 = np.iinfo('uint16').max
     rarray = RaggedArray([np.zeros(max_uint16), []], dtype='int64')
     assert rarray.start_indices.dtype == np.dtype('uint16')
-    assert np.array_equal(rarray.start_indices, [0, max_uint16])
+    np.testing.assert_array_equal(rarray.start_indices, [0, max_uint16])
 
     # Min uint32
     rarray = RaggedArray([np.zeros(max_uint16 + 1), []], dtype='int64')
     assert rarray.start_indices.dtype == np.dtype('uint32')
-    assert np.array_equal(rarray.start_indices, [0, max_uint16 + 1])
+    np.testing.assert_array_equal(rarray.start_indices, [0, max_uint16 + 1])
 
 
 @pytest.mark.parametrize('arg,expected', [
@@ -200,7 +201,7 @@ def test_isna():
     rarray = RaggedArray([[], [1, 3], [10, 20, 30],
                           None, [11, 22, 33, 44], []], dtype='int32')
 
-    assert np.array_equal(rarray.isna(),
+    np.testing.assert_array_equal(rarray.isna(),
                           np.array([True, False, False, True, False, True]))
 
 
@@ -216,8 +217,12 @@ def test_get_item_scalar():
         if expected is None:
             expected = np.array([], dtype='float16')
 
-        assert result.dtype == 'float16'
-        assert np.array_equal(result, expected)
+        if isinstance(result, np.ndarray):
+            assert result.dtype == 'float16'
+        else:
+            assert np.isnan(result)
+
+        np.testing.assert_array_equal(result, expected)
 
     # Reversed
     for i, expected in enumerate(arg):
@@ -225,8 +230,11 @@ def test_get_item_scalar():
         if expected is None:
             expected = np.array([], dtype='float16')
 
-        assert result.dtype == 'float16'
-        assert np.array_equal(result, expected)
+        if isinstance(result, np.ndarray):
+            assert result.dtype == 'float16'
+        else:
+            assert np.isnan(result)
+        np.testing.assert_array_equal(result, expected)
 
 
 @pytest.mark.parametrize('index', [-1000, -6, 5, 1000])
@@ -298,7 +306,7 @@ def test_factorization():
     rarray = RaggedArray(arg, dtype='int16')
     labels, uniques = rarray.factorize()
 
-    assert np.array_equal(labels, [0, -1, 0, -1, 1])
+    np.testing.assert_array_equal(labels, [0, -1, 0, -1, 1])
     assert_ragged_arrays_equal(
         uniques, RaggedArray([[1, 2], [11, 22, 33, 44]], dtype='int16'))
 
@@ -429,6 +437,25 @@ def data():
         [[0, 1], [1, 2, 3, 4], [], [-1, -2], []]*20, dtype='float64')
 
 
+@pytest.fixture
+def data_repeated(data):
+    """
+    Generate many datasets.
+    Parameters
+    ----------
+    data : fixture implementing `data`
+    Returns
+    -------
+    Callable[[int], Generator]:
+        A callable that takes a `count` argument and
+        returns a generator yielding `count` datasets.
+    """
+    def gen(count):
+        for _ in range(count):
+            yield data
+    return gen
+
+
 @pytest.fixture
 def data_missing():
     """Length-2 array with [NA, Valid]"""
@@ -472,6 +499,187 @@ def data_for_grouping():
         [[1, 0], [1, 0], [], [], [0, 0], [0, 0], [1, 0], [2, 0]])
 
 
+@pytest.fixture
+def na_cmp():
+    return lambda x, y: (np.isscalar(x) and np.isnan(x) and
+                         np.isscalar(y) and np.isnan(y))
+
+
+@pytest.fixture
+def na_value():
+    return np.nan
+
+
 # Subclass BaseDtypeTests to run pandas-provided extension array test suite
-class TestRaggedDtype(BaseDtypeTests):
+class TestRaggedConstructors(eb.BaseConstructorsTests):
+    pass
+
+
+class TestRaggedDtype(eb.BaseDtypeTests):
+    pass
+
+
+class TestRaggedGetitem(eb.BaseGetitemTests):
+
+    # Override testing methods that assume extension array scalars are
+    # comparable using `==`. Replace with assert_array_equal.
+    #
+    # If pandas introduces a way to customize element equality tests
+    # these overrides should be removed.
+    def test_get(self, data):
+        # GH 20882
+        s = pd.Series(data, index=[2 * i for i in range(len(data))])
+        np.testing.assert_array_equal(s.get(4), s.iloc[2])
+
+        result = s.get([4, 6])
+        expected = s.iloc[[2, 3]]
+        self.assert_series_equal(result, expected)
+
+        result = s.get(slice(2))
+        expected = s.iloc[[0, 1]]
+        self.assert_series_equal(result, expected)
+
+        assert s.get(-1) is None
+        assert s.get(s.index.max() + 1) is None
+
+        s = pd.Series(data[:6], index=list('abcdef'))
+        np.testing.assert_array_equal(s.get('c'), s.iloc[2])
+
+        result = s.get(slice('b', 'd'))
+        expected = s.iloc[[1, 2, 3]]
+        self.assert_series_equal(result, expected)
+
+        result = s.get('Z')
+        assert result is None
+
+        np.testing.assert_array_equal(s.get(4), s.iloc[4])
+        np.testing.assert_array_equal(s.get(-1), s.iloc[-1])
+        assert s.get(len(s)) is None
+
+    def test_take_sequence(self, data):
+        result = pd.Series(data)[[0, 1, 3]]
+        np.testing.assert_array_equal(result.iloc[0], data[0])
+        np.testing.assert_array_equal(result.iloc[1], data[1])
+        np.testing.assert_array_equal(result.iloc[2], data[3])
+
+    def test_take(self, data, na_value, na_cmp):
+        result = data.take([0, -1])
+        np.testing.assert_array_equal(result.dtype, data.dtype)
+        np.testing.assert_array_equal(result[0], data[0])
+        np.testing.assert_array_equal(result[1], data[-1])
+
+        result = data.take([0, -1], allow_fill=True, fill_value=na_value)
+        np.testing.assert_array_equal(result[0], data[0])
+        assert na_cmp(result[1], na_value)
+
+        with pytest.raises(IndexError, match="out of bounds"):
+            data.take([len(data) + 1])
+
+
+class TestRaggedGroupby(eb.BaseGroupbyTests):
+    @pytest.mark.parametrize('op', [
+        lambda x: 1,
+        lambda x: [1] * len(x),
+        # # Op below causes a:
+        # # ValueError: Names should be list-like for a MultiIndex
+        # lambda x: pd.Series([1] * len(x)),
+        lambda x: x,
+    ], ids=[
+        'scalar',
+        'list',
+        # 'series',
+        'object'])
+    def test_groupby_extension_apply(self, data_for_grouping, op):
+        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
+                           "B": data_for_grouping})
+        df.groupby("B").apply(op)
+        df.groupby("B").A.apply(op)
+        df.groupby("A").apply(op)
+        df.groupby("A").B.apply(op)
+
+
+class TestRaggedInterface(eb.BaseInterfaceTests):
+    # Add array equality
+    def test_array_interface(self, data):
+        result = np.array(data)
+        np.testing.assert_array_equal(result[0], data[0])
+
+        result = np.array(data, dtype=object)
+        expected = np.array(list(data), dtype=object)
+
+        for a1, a2 in zip(result, expected):
+            if np.isscalar(a1):
+                assert np.isnan(a1) and np.isnan(a2)
+            else:
+                tm.assert_numpy_array_equal(a2, a1)
+
+
+class TestRaggedMethods(eb.BaseMethodsTests):
+
+    # AttributeError: 'RaggedArray' object has no attribute 'value_counts'
+    @pytest.mark.skip(reason="value_counts not supported")
+    def test_value_counts(self, all_data, dropna):
+        pass
+
+    # Add array equality
+    @pytest.mark.parametrize('box', [pd.Series, lambda x: x])
+    @pytest.mark.parametrize('method', [lambda x: x.unique(), pd.unique])
+    def test_unique(self, data, box, method):
+        duplicated = box(data._from_sequence([data[0], data[0]]))
+
+        result = method(duplicated)
+
+        assert len(result) == 1
+        assert isinstance(result, type(data))
+        np.testing.assert_array_equal(result[0], duplicated[0])
+
+    # Pandas raises
+    #   ValueError: invalid fill value with a <class 'numpy.ndarray'>
+    @pytest.mark.skip(reason="pandas cannot fill with ndarray")
+    def test_fillna_copy_frame(self, data_missing):
+        pass
+
+    @pytest.mark.skip(reason="pandas cannot fill with ndarray")
+    def test_fillna_copy_series(self, data_missing):
+        pass
+
+    # Ragged array elements don't support binary operators
+    @pytest.mark.skip(reason="ragged does not support <= on elements")
+    def test_combine_le(self, data_repeated):
+        pass
+
+    @pytest.mark.skip(reason="ragged does not support + on elements")
+    def test_combine_add(self, data_repeated):
+        pass
+
+    # Block manager error:
+    #   ValueError: setting an array element with a sequence.
+    @pytest.mark.skip(reason="combine_first not supported")
+    def test_combine_first(self, data):
+        pass
+
+
+class TestRaggedPrinting(eb.BasePrintingTests):
+    pass
+
+
+class TestRaggedMissing(eb.BaseMissingTests):
+
+    # Pandas doesn't like using an ndarray as fill value.
+    # Errors like:
+    #   ValueError: Length of 'value' does not match. Got (3)  expected 2
+    @pytest.mark.skip(reason="Can't fill with ndarray")
+    def test_fillna_scalar(self, data_missing):
+        pass
+
+    @pytest.mark.skip(reason="Can't fill with ndarray")
+    def test_fillna_series(self, data_missing):
+        pass
+
+    @pytest.mark.skip(reason="Can't fill with ndarray")
+    def test_fillna_frame(self, data_missing):
+        pass
+
+
+class TestRaggedReshaping(eb.BaseReshapingTests):
     pass

From 16aff67b93d297bf31780449f5e6b40f7fec00d8 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Thu, 17 Jan 2019 07:51:32 -0500
Subject: [PATCH 16/45] Override fillna is RaggedArray and enable test

---
 datashader/datatypes.py            | 91 +++++++++++++++++++++++++++++-
 datashader/tests/test_datatypes.py |  7 +--
 2 files changed, 91 insertions(+), 7 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 0d693e716..9d5f346f5 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -365,13 +365,102 @@ def unique(self):
             [RaggedElement.array_or_nan(v) for v in uniques],
             dtype=self.dtype)
 
+    def fillna(self, value=None, method=None, limit=None):
+        """
+        Fill NA/NaN values using the specified method.
+
+        Parameters
+        ----------
+        value : scalar, array-like
+            If a scalar value is passed it is used to fill all missing values.
+            Alternatively, an array-like 'value' can be given. It's expected
+            that the array-like have the same length as 'self'.
+        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
+            Method to use for filling holes in reindexed Series
+            pad / ffill: propagate last valid observation forward to next valid
+            backfill / bfill: use NEXT valid observation to fill gap
+        limit : int, default None
+            If method is specified, this is the maximum number of consecutive
+            NaN values to forward/backward fill. In other words, if there is
+            a gap with more than this number of consecutive NaNs, it will only
+            be partially filled. If method is not specified, this is the
+            maximum number of entries along the entire axis where NaNs will be
+            filled.
+
+        Returns
+        -------
+        filled : ExtensionArray with NA/NaN filled
+        """
+        # Override in RaggedArray to handle ndarray fill values
+        from pandas.api.types import is_array_like
+        from pandas.util._validators import validate_fillna_kwargs
+        from pandas.core.missing import pad_1d, backfill_1d
+
+        value, method = validate_fillna_kwargs(value, method)
+
+        mask = self.isna()
+
+        if isinstance(value, RaggedArray):
+            if len(value) != len(self):
+                raise ValueError("Length of 'value' does not match. Got ({}) "
+                                 " expected {}".format(len(value), len(self)))
+            value = value[mask]
+
+        if mask.any():
+            if method is not None:
+                func = pad_1d if method == 'pad' else backfill_1d
+                new_values = func(self.astype(object), limit=limit,
+                                  mask=mask)
+                new_values = self._from_sequence(new_values, dtype=self.dtype)
+            else:
+                # fill with value
+                new_values = list(self)
+                mask_indices, = np.where(mask)
+                for ind in mask_indices:
+                    new_values[ind] = value
+
+                new_values = self._from_sequence(new_values, dtype=self.dtype)
+        else:
+            new_values = self.copy()
+        return new_values
+
     def shift(self, periods=1, fill_value=None):
         # type: (int, object) -> ExtensionArray
         """
         Shift values by desired number.
 
-        Override in RaggedArray to handle ndarray fill values
+        Newly introduced missing values are filled with
+        ``self.dtype.na_value``.
+
+        .. versionadded:: 0.24.0
+
+        Parameters
+        ----------
+        periods : int, default 1
+            The number of periods to shift. Negative values are allowed
+            for shifting backwards.
+
+        fill_value : object, optional
+            The scalar value to use for newly introduced missing values.
+            The default is ``self.dtype.na_value``
+
+            .. versionadded:: 0.24.0
+
+        Returns
+        -------
+        shifted : ExtensionArray
+
+        Notes
+        -----
+        If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is
+        returned.
+
+        If ``periods > len(self)``, then an array of size
+        len(self) is returned, with all values filled with
+        ``self.dtype.na_value``.
         """
+        # Override in RaggedArray to handle ndarray fill values
+
         # Note: this implementation assumes that `self.dtype.na_value` can be
         # stored in an instance of your ExtensionArray with `self.dtype`.
         if not len(self) or periods == 0:
diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index 5ab4b15c8..20533c377 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -664,14 +664,9 @@ class TestRaggedPrinting(eb.BasePrintingTests):
 
 
 class TestRaggedMissing(eb.BaseMissingTests):
-
     # Pandas doesn't like using an ndarray as fill value.
     # Errors like:
-    #   ValueError: Length of 'value' does not match. Got (3)  expected 2
-    @pytest.mark.skip(reason="Can't fill with ndarray")
-    def test_fillna_scalar(self, data_missing):
-        pass
-
+    #   ValueError: invalid fill value with a <class 'numpy.ndarray'>
     @pytest.mark.skip(reason="Can't fill with ndarray")
     def test_fillna_series(self, data_missing):
         pass

From 5772ade2136906697d0a76ba1a2735be68729b2a Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Thu, 17 Jan 2019 07:52:35 -0500
Subject: [PATCH 17/45] Add vectorized equality operators

---
 datashader/datatypes.py            | 183 +++++++++++++++++++++++++++++
 datashader/tests/test_datatypes.py |  95 +++++++++++++++
 2 files changed, 278 insertions(+)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 9d5f346f5..a7abf863a 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -212,6 +212,50 @@ def __init__(self, data, dtype=None):
                 # increment next start index
                 next_start_ind += n
 
+    def __eq__(self, other):
+        if isinstance(other, RaggedArray):
+            if len(other) != len(self):
+                raise ValueError("""
+Cannot check equality of RaggedArray values of unequal length
+    len(ra1) == {len_ra1}
+    len(ra2) == {len_ra2}""".format(
+                    len_ra1=len(self),
+                    len_ra2=len(other)))
+
+            result = _eq_ragged_ragged(self, other)
+        else:
+            # Convert other to numpy arrauy
+            if not isinstance(other, np.ndarray):
+                other_array = np.asarray(other)
+            else:
+                other_array = other
+
+            if other_array.ndim == 1 and other_array.dtype.kind != 'O':
+
+                # Treat as ragged scalar
+                result = _eq_ragged_scalar(self, other_array)
+            elif (other_array.ndim == 1 and
+                  other_array.dtype.kind == 'O' and
+                  len(other_array) == len(self)):
+
+                # Treat as vector
+                result = _eq_ragged_ndarray1d(self, other_array)
+            elif (other_array.ndim == 2 and
+                  other_array.dtype.kind != 'O' and
+                  other_array.shape[0] == len(self)):
+
+                # Treat rows as ragged elements
+                result = _eq_ragged_ndarray2d(self, other_array)
+            else:
+                raise ValueError("""
+Cannot check equality of RaggedArray of length {ra_len} with:
+    {other}""".format(ra_len=len(self), other=repr(other)))
+
+        return result
+
+    def __ne__(self, other):
+        return np.logical_not(self == other)
+
     @property
     def flat_array(self):
         """
@@ -670,3 +714,142 @@ def astype(self, dtype, copy=True):
             dtype.construct_array_type()._from_sequence(np.asarray(self))
 
         return np.array([v for v in self], dtype=dtype, copy=copy)
+
+
+def _eq_ragged_ragged(ra1, ra2):
+    """
+    Compare elements of two ragged arrays of the same length
+
+    Parameters
+    ----------
+    ra1: RaggedArray
+    ra2: RaggedArray
+
+    Returns
+    -------
+    mask: ndarray
+        1D bool array of same length as inputs with elements True when
+        corresponding elements are equal, False otherwise
+    """
+    start_indices1 = ra1.start_indices
+    flat_array1 = ra1.flat_array
+
+    start_indices2 = ra2.start_indices
+    flat_array2 = ra2.flat_array
+
+    n = len(start_indices1)
+    m1 = len(flat_array1)
+    m2 = len(flat_array2)
+
+    result = np.zeros(n, dtype=np.bool)
+
+    for i in range(n):
+        # Extract inds for ra1
+        start_index1 = start_indices1[i]
+        stop_index1 = start_indices1[i + 1] if i < n - 1 else m1
+
+        # Extract inds for ra2
+        start_index2 = start_indices2[i]
+        stop_index2 = start_indices2[i + 1] if i < n - 1 else m2
+
+        result[i] = np.array_equal(flat_array1[start_index1:stop_index1],
+                                   flat_array2[start_index2:stop_index2])
+
+    return result
+
+
+def _eq_ragged_scalar(ra, val):
+    """
+    Compare elements of a RaggedArray with a scalar array
+
+    Parameters
+    ----------
+    ra: RaggedArray
+    val: ndarray
+
+    Returns
+    -------
+    mask: ndarray
+        1D bool array of same length as inputs with elements True when
+        ragged element equals scalar val, False otherwise.
+    """
+    start_indices = ra.start_indices
+    flat_array = ra.flat_array
+
+    n = len(start_indices)
+    m = len(flat_array)
+    result = np.zeros(n, dtype=np.bool)
+    for i in range(n):
+        start_index = start_indices[i]
+        stop_index = start_indices[i+1] if i < n - 1 else m
+        result[i] = np.array_equal(flat_array[start_index:stop_index], val)
+
+    return result
+
+
+def _eq_ragged_ndarray1d(ra, a):
+    """
+    Compare a RaggedArray with a 1D numpy object array of the same length
+
+    Parameters
+    ----------
+    ra: RaggedArray
+    a: ndarray
+        1D numpy array of same length as ra
+
+    Returns
+    -------
+    mask: ndarray
+        1D bool array of same length as input with elements True when
+        corresponding elements are equal, False otherwise
+    """
+    start_indices = ra.start_indices
+    flat_array = ra.flat_array
+
+    n = len(start_indices)
+    m = len(flat_array)
+    result = np.zeros(n, dtype=np.bool)
+    for i in range(n):
+        start_index = start_indices[i]
+        stop_index = start_indices[i + 1] if i < n - 1 else m
+        a_val = a[i]
+        if (a_val is None or
+                (np.isscalar(a_val) and np.isnan(a_val)) or
+                len(a_val) == 0):
+            result[i] = start_index == stop_index
+        else:
+            result[i] = np.array_equal(flat_array[start_index:stop_index],
+                                       a_val)
+
+    return result
+
+
+def _eq_ragged_ndarray2d(ra, a):
+    """
+    Compare a RaggedArray with rows of a 2D numpy object array
+
+    Parameters
+    ----------
+    ra: RaggedArray
+    a: ndarray
+        A 2D numpy array where the length of the first dimension matches the
+        length of the RaggedArray
+
+    Returns
+    -------
+    mask: ndarray
+        1D bool array of same length as input RaggedArray with elements True
+        when corresponding elements of ra equals corresponding row of a
+    """
+    start_indices = ra.start_indices
+    flat_array = ra.flat_array
+
+    n = len(start_indices)
+    m = len(flat_array)
+    result = np.zeros(n, dtype=np.bool)
+    for i in range(n):
+        start_index = start_indices[i]
+        stop_index = start_indices[i + 1] if i < n - 1 else m
+        result[i] = np.array_equal(flat_array[start_index:stop_index],
+                                   a[i, :])
+    return result
diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index 20533c377..d4d583074 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -418,6 +418,101 @@ def test_concat_series():
     pd.testing.assert_series_equal(s_concat, expected)
 
 
+# Array equality
+# --------------
+@pytest.mark.parametrize('scalar', [
+    np.array([1, 2]), [1, 2]
+])
+def test_array_eq_scalar(scalar):
+    # Build RaggedArray
+    arg1 = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]]
+    ra = RaggedArray(arg1, dtype='int32')
+
+    # Check equality
+    result = ra == scalar
+    expected = np.array([1, 0, 1, 0, 0], dtype='bool')
+    np.testing.assert_array_equal(result, expected)
+
+    # Check non-equality
+    result_negated = ra != scalar
+    expected_negated = ~expected
+    np.testing.assert_array_equal(result_negated, expected_negated)
+
+
+def test_array_eq_numpy1():
+    # Build RaggedArray
+    arg1 = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]]
+
+    # Construct arrays
+    ra = RaggedArray(arg1, dtype='int32')
+    npa = np.array(arg1, dtype='object')
+
+    # Check equality
+    result = ra == npa
+    expected = np.array([1, 1, 1, 1, 1], dtype='bool')
+    np.testing.assert_array_equal(result, expected)
+
+    # Check non-equality
+    result_negated = ra != npa
+    expected_negated = ~expected
+    np.testing.assert_array_equal(result_negated, expected_negated)
+
+
+def test_array_eq_numpy2d():
+    # Construct arrays
+    ra = RaggedArray([[1, 2], [], [1, 2], None, [11, 22, 33, 44]],
+                     dtype='int32')
+    npa = np.array([[1, 2], [2, 3], [1, 2], [0, 1], [11, 22]],
+                   dtype='int32')
+
+    # Check equality
+    result = ra == npa
+    expected = np.array([1, 0, 1, 0, 0], dtype='bool')
+    np.testing.assert_array_equal(result, expected)
+
+    # Check non-equality
+    result_negated = ra != npa
+    expected_negated = ~expected
+    np.testing.assert_array_equal(result_negated, expected_negated)
+
+
+def test_array_eq_ragged():
+    # Build RaggedArray
+    arg1 = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]]
+    ra1 = RaggedArray(arg1, dtype='int32')
+
+    # Build RaggedArray
+    arg2 = [[1, 2], [2, 3, 4, 5], [1, 2], None, [11]]
+    ra2 = RaggedArray(arg2, dtype='int32')
+
+    # Check equality
+    result = ra1 == ra2
+    expected = np.array([1, 0, 1, 1, 0], dtype='bool')
+    np.testing.assert_array_equal(result, expected)
+
+    # Check non-equality
+    result_negated = ra1 != ra2
+    expected_negated = ~expected
+    np.testing.assert_array_equal(result_negated, expected_negated)
+
+
+@pytest.mark.parametrize('other', [
+    'a string',  # Incompatible scalars
+    32,
+    RaggedArray([[0, 1], [2, 3, 4]]),  # RaggedArray of wrong length
+    np.array([[0, 1], [2, 3, 4]], dtype='object'),  # 1D array wrong length
+    np.array([[0, 1], [2, 3]], dtype='int32'),  # 2D array wrong row count
+])
+def test_equality_validation(other):
+    # Build RaggedArray
+    arg1 = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]]
+    ra1 = RaggedArray(arg1, dtype='int32')
+
+    # invalid scalar
+    with pytest.raises(ValueError, match="Cannot check equality"):
+        res = ra1 == other
+
+
 # Pandas-provided extension array tests
 # -------------------------------------
 # See http://pandas-docs.github.io/pandas-docs-travis/extending.html

From 939405b8fcbe93dec4a9a5b976a4cd3bfed805d7 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Thu, 17 Jan 2019 08:01:44 -0500
Subject: [PATCH 18/45] pass start_indices and flat_array arrays as args to
 _validate_ragged_properties rather than dict key/value pairs.

---
 datashader/datatypes.py | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index a7abf863a..eb661823d 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -9,26 +9,28 @@
 from pandas.core.dtypes.common import is_extension_array_dtype
 
 
-def _validate_ragged_properties(data):
+def _validate_ragged_properties(start_indices, flat_array):
     """
-    Validate that dict contains the necessary properties to construct a
-    RaggedArray.
+    Validate that start_indices are flat_array arrays may be used to
+    represent a valid RaggedArray.
 
     Parameters
     ----------
-    data: dict
-        A dict containing 'start_indices' and 'flat_array' keys
-        with numpy array values
-
+    flat_array: numpy array containing concatenation
+                of all nested arrays to be represented
+                by this ragged array
+    start_indices: unsiged integer numpy array the same
+                   length as the ragged array where values
+                   represent the index into flat_array where
+                   the corresponding ragged array element
+                   begins
     Raises
     ------
     ValueError:
-        if input contains invalid or incompatible properties
+        if input arguments are invalid or incompatible properties
     """
 
     # Validate start_indices
-    start_indices = data['start_indices']
-
     if (not isinstance(start_indices, np.ndarray) or
             start_indices.dtype.kind != 'u' or
             start_indices.ndim != 1):
@@ -39,8 +41,6 @@ def _validate_ragged_properties(data):
             typ=type(start_indices), v=repr(start_indices)))
 
     # Validate flat_array
-    flat_array = data['flat_array']
-
     if (not isinstance(flat_array, np.ndarray) or
             flat_array.ndim != 1):
         raise ValueError("""
@@ -163,7 +163,9 @@ def __init__(self, data, dtype=None):
                 all(k in data for k in
                     ['start_indices', 'flat_array'])):
 
-            _validate_ragged_properties(data)
+            _validate_ragged_properties(
+                start_indices=data['start_indices'],
+                flat_array=data['flat_array'])
 
             self._start_indices = data['start_indices']
             self._flat_array = data['flat_array']

From 7f355d2c1430538a180e9e72dbb75ef7228ffb68 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Thu, 17 Jan 2019 08:09:29 -0500
Subject: [PATCH 19/45] Add copy arg to RaggedArray constructor

---
 datashader/datatypes.py | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index eb661823d..2a538f1ae 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -131,7 +131,7 @@ def missing(v):
 
 
 class RaggedArray(ExtensionArray):
-    def __init__(self, data, dtype=None):
+    def __init__(self, data, dtype=None, copy=False):
         """
         Construct a RaggedArray
 
@@ -157,6 +157,10 @@ def __init__(self, data, dtype=None):
             Datatype to use to store underlying values from data.
             If none (the default) then dtype will be determined using the
             numpy.result_type function.
+        copy : bool (default False)
+            Whether to deep copy the input arrays. Only relevant when `data`
+            has type `dict` or `RaggedArray`. When data is a `list` or
+            `array`, input arrays are always copied.
         """
         self._dtype = RaggedDtype()
         if (isinstance(data, dict) and
@@ -169,9 +173,17 @@ def __init__(self, data, dtype=None):
 
             self._start_indices = data['start_indices']
             self._flat_array = data['flat_array']
+
+            if copy:
+                self._start_indices = self._start_indices.copy()
+                self._flat_array = self._flat_array.copy()
         elif isinstance(data, RaggedArray):
-            self._flat_array = data.flat_array.copy()
-            self._start_indices = data.start_indices.copy()
+            self._flat_array = data.flat_array
+            self._start_indices = data.start_indices
+
+            if copy:
+                self._start_indices = self._start_indices.copy()
+                self._flat_array = self._flat_array.copy()
         else:
             # Compute lengths
             index_len = len(data)
@@ -659,11 +671,7 @@ def copy(self, deep=False):
             flat_array=self.flat_array,
             start_indices=self.start_indices)
 
-        if deep:
-            # Copy underlying numpy arrays
-            data = {k: v.copy() for k, v in data.items()}
-
-        return RaggedArray(data)
+        return RaggedArray(data, copy=deep)
 
     @classmethod
     def _concat_same_type(cls, to_concat):
@@ -690,7 +698,8 @@ def _concat_same_type(cls, to_concat):
                                    for offset, ra in zip(offsets, to_concat)])
 
         return RaggedArray(dict(
-            flat_array=flat_array, start_indices=start_indices))
+            flat_array=flat_array, start_indices=start_indices),
+            copy=False)
 
     @property
     def dtype(self):

From 9e449468fe114c17bed911e41fdd54013054f3c2 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Thu, 17 Jan 2019 08:10:22 -0500
Subject: [PATCH 20/45] +=

---
 datashader/datatypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 2a538f1ae..f8f4847e7 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -322,7 +322,7 @@ def __getitem__(self, item):
             else:
                 # Convert negative item index
                 if item < 0:
-                    item = len(self) + item
+                    item += len(self)
 
                 slice_start = self.start_indices[item]
                 slice_end = (self.start_indices[item+1]

From a52728a2a5f7e465a7d84ff3adffcb3ee7330d20 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Thu, 17 Jan 2019 08:12:11 -0500
Subject: [PATCH 21/45] Fix missing return

---
 datashader/datatypes.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index f8f4847e7..a1615ddba 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -722,7 +722,8 @@ def astype(self, dtype, copy=True):
             return self
 
         elif is_extension_array_dtype(dtype):
-            dtype.construct_array_type()._from_sequence(np.asarray(self))
+            return dtype.construct_array_type()._from_sequence(
+                np.asarray(self))
 
         return np.array([v for v in self], dtype=dtype, copy=copy)
 

From 75f914d10541277c93553dc67c67ff47cd3e9cac Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Thu, 17 Jan 2019 09:37:26 -0500
Subject: [PATCH 22/45] Parameterize  RaggedDtype by element type

This way the element dtype can be specified in the ragged datatype string

e.g.

>>> pd.Series([[1, 2], [2, 3, 4], None], dtype='Ragged[uint16]')
Out[13]:
0      [1 2]
1    [2 3 4]
2        NaN
dtype: Ragged[uint16]
---
 datashader/datatypes.py            | 80 +++++++++++++++++++++++++++---
 datashader/tests/test_datatypes.py | 20 ++++----
 2 files changed, 83 insertions(+), 17 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index a1615ddba..365fece9c 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -1,3 +1,4 @@
+import re
 from functools import total_ordering
 
 import numpy as np
@@ -112,6 +113,15 @@ class RaggedDtype(ExtensionDtype):
     name = 'ragged'
     type = np.ndarray
     base = np.dtype('O')
+    _subtype_re = re.compile(r"^ragged\[(?P<subtype>\w+)\]$")
+    _metadata = ('_dtype',)
+
+    @property
+    def name(self):
+        return 'Ragged[{subtype}]'.format(subtype=self.subtype)
+
+    def __repr__(self):
+        return self.name
 
     @classmethod
     def construct_array_type(cls):
@@ -119,11 +129,61 @@ def construct_array_type(cls):
 
     @classmethod
     def construct_from_string(cls, string):
-        if string == 'ragged':
-            return RaggedDtype()
+        # lowercase string
+        string = string.lower()
+
+        msg = "Cannot construct a 'RaggedDtype' from '{}'"
+        if string.startswith('ragged'):
+            # Extract subtype
+            try:
+                subtype_string = cls._parse_subtype(string)
+                return RaggedDtype(dtype=subtype_string)
+            except Exception:
+                raise TypeError(msg.format(string))
+        else:
+            raise TypeError(msg.format(string))
+
+    def __init__(self, dtype=np.float64):
+        if isinstance(dtype, RaggedDtype):
+            self._dtype = dtype.subtype
         else:
-            raise TypeError("Cannot construct a '{}' from '{}'"
-                            .format(cls, string))
+            self._dtype = np.dtype(dtype)
+
+    @property
+    def subtype(self):
+        return self._dtype
+
+    @classmethod
+    def _parse_subtype(cls, dtype_string):
+        """
+        Parse a datatype string to get the subtype
+
+        Parameters
+        ----------
+        dtype_string: str
+            A string like Ragged[subtype]
+
+        Returns
+        -------
+        subtype: str
+
+        Raises
+        ------
+        ValueError
+            When the subtype cannot be extracted
+        """
+        # Be case insensitive
+        dtype_string = dtype_string.lower()
+
+        match = cls._subtype_re.match(dtype_string)
+        if match:
+            subtype_string = match.groupdict()['subtype']
+        elif dtype_string == 'ragged':
+            subtype_string = 'float64'
+        else:
+            raise ValueError("Cannot parse {dtype_string}".format(
+                dtype_string=dtype_string))
+        return subtype_string
 
 
 def missing(v):
@@ -153,7 +213,7 @@ def __init__(self, data, dtype=None, copy=False):
                                      begins
             * RaggedArray: A RaggedArray instance to copy
 
-        dtype: np.dtype or str or None (default None)
+        dtype: RaggedDtype or np.dtype or str or None (default None)
             Datatype to use to store underlying values from data.
             If none (the default) then dtype will be determined using the
             numpy.result_type function.
@@ -162,7 +222,6 @@ def __init__(self, data, dtype=None, copy=False):
             has type `dict` or `RaggedArray`. When data is a `list` or
             `array`, input arrays are always copied.
         """
-        self._dtype = RaggedDtype()
         if (isinstance(data, dict) and
                 all(k in data for k in
                     ['start_indices', 'flat_array'])):
@@ -173,13 +232,16 @@ def __init__(self, data, dtype=None, copy=False):
 
             self._start_indices = data['start_indices']
             self._flat_array = data['flat_array']
+            dtype = self._flat_array.dtype
 
             if copy:
                 self._start_indices = self._start_indices.copy()
                 self._flat_array = self._flat_array.copy()
+
         elif isinstance(data, RaggedArray):
             self._flat_array = data.flat_array
             self._start_indices = data.start_indices
+            dtype = self._flat_array.dtype
 
             if copy:
                 self._start_indices = self._start_indices.copy()
@@ -206,6 +268,8 @@ def __init__(self, data, dtype=None, copy=False):
                     dtype = np.result_type(*non_missing)
                 else:
                     dtype = 'float64'
+            elif isinstance(dtype, RaggedDtype):
+                dtype = dtype.subtype
 
             # Initialize representation arrays
             self._start_indices = np.zeros(index_len, dtype=start_indices_dtype)
@@ -226,6 +290,8 @@ def __init__(self, data, dtype=None, copy=False):
                 # increment next start index
                 next_start_ind += n
 
+        self._dtype = RaggedDtype(dtype=dtype)
+
     def __eq__(self, other):
         if isinstance(other, RaggedArray):
             if len(other) != len(self):
@@ -375,7 +441,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
         -------
         RaggedArray
         """
-        return RaggedArray(scalars)
+        return RaggedArray(scalars, dtype=dtype)
 
     @classmethod
     def _from_factorized(cls, values, original):
diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index d4d583074..9aabb9759 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -12,7 +12,7 @@
 def assert_ragged_arrays_equal(ra1, ra2):
     assert np.array_equal(ra1.start_indices, ra2.start_indices)
     assert np.array_equal(ra1.flat_array, ra2.flat_array)
-    assert np.array_equal(ra1.flat_array.dtype, ra2.flat_array.dtype)
+    assert ra1.flat_array.dtype == ra2.flat_array.dtype
 
     # Make sure ragged elements are equal when iterated over
     for a1, a2 in zip(ra1, ra2):
@@ -24,7 +24,7 @@ def assert_ragged_arrays_equal(ra1, ra2):
 def test_construct_ragged_dtype():
     dtype = RaggedDtype()
     assert dtype.type == np.ndarray
-    assert dtype.name == 'ragged'
+    assert dtype.name == 'Ragged[{subtype}]'.format(subtype=dtype.subtype)
     assert dtype.kind == 'O'
 
 
@@ -384,35 +384,35 @@ def test_concat_same_type():
 # ----------------------
 def test_pandas_array_construction():
     arg = [[0, 1], [1, 2, 3, 4], None, [-1, -2]] * 2
-    ra = pd.array(arg, dtype='ragged')
+    ra = pd.array(arg, dtype='ragged[int64]')
 
     expected = RaggedArray(arg)
     assert_ragged_arrays_equal(ra, expected)
 
 
 def test_series_construction():
-    arg = [[0, 1], [1, 2, 3, 4], None, [-1, -2]] * 2
-    rs = pd.Series(arg, dtype='ragged')
+    arg = [[0, 1], [1.0, 2, 3.0, 4], None, [-1, -2]] * 2
+    rs = pd.Series(arg, dtype='Ragged[int64]')
     ra = rs.array
 
-    expected = RaggedArray(arg)
+    expected = RaggedArray(arg, dtype='int64')
     assert_ragged_arrays_equal(ra, expected)
 
 
 def test_concat_series():
     arg1 = [[1, 2], [], [10, 20], None, [11, 22, 33, 44]]
-    s1 = pd.Series(arg1, dtype='ragged')
+    s1 = pd.Series(arg1, dtype='ragged[int16]')
 
     arg2 = [[100, 200], None, [99, 100, 101]]
-    s2 = pd.Series(arg2, dtype='ragged')
+    s2 = pd.Series(arg2, dtype='ragged[int16]')
 
     arg3 = [None, [27, 28]]
-    s3 = pd.Series(arg3, dtype='ragged')
+    s3 = pd.Series(arg3, dtype='ragged[int16]')
 
     s_concat = pd.concat([s1, s2, s3])
 
     expected = pd.Series(arg1+arg2+arg3,
-                         dtype='ragged',
+                         dtype='ragged[int16]',
                          index=[0, 1, 2, 3, 4, 0, 1, 2, 0, 1])
 
     pd.testing.assert_series_equal(s_concat, expected)

From 32f4a3c975bbf9d57d81414c24112dd60a327552 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Thu, 17 Jan 2019 18:52:45 -0500
Subject: [PATCH 23/45] Remove tuple conversions in RaggedElement

---
 datashader/datatypes.py | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 365fece9c..88f4d0738 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -89,8 +89,7 @@ def __init__(self, array):
         self.array = array
 
     def __hash__(self):
-        # TODO: Rewrite using self.array directly without tuple
-        return hash(tuple(self.array))
+        return hash(self.array.tobytes())
 
     def __eq__(self, other):
         if not isinstance(other, RaggedElement):
@@ -101,7 +100,8 @@ def __lt__(self, other):
         # TODO: Rewrite using self.array directly without tuples
         if not isinstance(other, RaggedElement):
             return NotImplemented
-        return tuple(self.array) < tuple(other.array)
+        # return tuple(self.array) < tuple(other.array)
+        return _lexograph_lt(self.array, other.array)
 
     def __repr__(self):
         array_repr = repr(self.array)
@@ -931,3 +931,26 @@ def _eq_ragged_ndarray2d(ra, a):
         result[i] = np.array_equal(flat_array[start_index:stop_index],
                                    a[i, :])
     return result
+
+
+def _lexograph_lt(a1, a2):
+    """
+    Compare two 1D numpy arrays lexographically
+    Parameters
+    ----------
+    a1: ndarray
+        1D numpy array
+    a2: ndarray
+        1D numpy array
+
+    Returns
+    -------
+    comparison:
+        True if a1 < a2, False otherwise
+    """
+    for e1, e2 in zip(a1, a2):
+        if e1 < e2:
+            return True
+        elif e1 > e2:
+            return False
+    return len(a1) < len(a2)

From 27403a7e1e6b0dcac300b75ee8f2bc1a1af6737d Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Thu, 17 Jan 2019 18:53:23 -0500
Subject: [PATCH 24/45] Designate _RaggedElement as an internal class

---
 datashader/datatypes.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 88f4d0738..9f7693f2d 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -69,14 +69,14 @@ def _validate_ragged_properties(start_indices, flat_array):
 # Internal ragged element array wrapper that provides
 # equality, ordering, and hashing.
 @total_ordering
-class RaggedElement(object):
+class _RaggedElement(object):
 
     @staticmethod
     def ragged_or_nan(a):
         if np.isscalar(a) and np.isnan(a):
             return a
         else:
-            return RaggedElement(a)
+            return _RaggedElement(a)
 
     @staticmethod
     def array_or_nan(a):
@@ -92,15 +92,13 @@ def __hash__(self):
         return hash(self.array.tobytes())
 
     def __eq__(self, other):
-        if not isinstance(other, RaggedElement):
+        if not isinstance(other, _RaggedElement):
             return False
         return np.array_equal(self.array, other.array)
 
     def __lt__(self, other):
-        # TODO: Rewrite using self.array directly without tuples
-        if not isinstance(other, RaggedElement):
+        if not isinstance(other, _RaggedElement):
             return NotImplemented
-        # return tuple(self.array) < tuple(other.array)
         return _lexograph_lt(self.array, other.array)
 
     def __repr__(self):
@@ -110,7 +108,6 @@ def __repr__(self):
 
 @register_extension_dtype
 class RaggedDtype(ExtensionDtype):
-    name = 'ragged'
     type = np.ndarray
     base = np.dtype('O')
     _subtype_re = re.compile(r"^ragged\[(?P<subtype>\w+)\]$")
@@ -461,11 +458,11 @@ def _from_factorized(cls, values, original):
         ExtensionArray.factorize
         """
         return RaggedArray(
-            [RaggedElement.array_or_nan(v) for v in values],
+            [_RaggedElement.array_or_nan(v) for v in values],
             dtype=original.flat_array.dtype)
 
     def _as_ragged_element_array(self):
-        return np.array([RaggedElement.ragged_or_nan(self[i])
+        return np.array([_RaggedElement.ragged_or_nan(self[i])
                          for i in range(len(self))])
 
     def _values_for_factorize(self):
@@ -486,7 +483,7 @@ def unique(self):
 
         uniques = unique(self._as_ragged_element_array())
         return self._from_sequence(
-            [RaggedElement.array_or_nan(v) for v in uniques],
+            [_RaggedElement.array_or_nan(v) for v in uniques],
             dtype=self.dtype)
 
     def fillna(self, value=None, method=None, limit=None):
@@ -654,7 +651,7 @@ def searchsorted(self, value, side="left", sorter=None):
         if isinstance(value, RaggedArray):
             search_value = value._as_ragged_element_array()
         else:
-            search_value = RaggedElement(value)
+            search_value = _RaggedElement(value)
         return arr.searchsorted(search_value, side=side, sorter=sorter)
 
     def isna(self):

From e93c24dcd3f1651ab72c6d0e8b5e3b1c7b4566c1 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Thu, 17 Jan 2019 19:43:57 -0500
Subject: [PATCH 25/45] numba jit utility functions

---
 datashader/datatypes.py            | 129 ++++++++++++++++++++---------
 datashader/tests/test_datatypes.py |  15 ++--
 2 files changed, 98 insertions(+), 46 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 9f7693f2d..c671891b0 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -2,6 +2,7 @@
 from functools import total_ordering
 
 import numpy as np
+from numba import jit
 from pandas.api.extensions import (
     ExtensionDtype, ExtensionArray, register_extension_dtype)
 from numbers import Integral
@@ -299,7 +300,9 @@ def __eq__(self, other):
                     len_ra1=len(self),
                     len_ra2=len(other)))
 
-            result = _eq_ragged_ragged(self, other)
+            result = _eq_ragged_ragged(
+                self.start_indices, self.flat_array,
+                other.start_indices, other.flat_array)
         else:
             # Convert other to numpy arrauy
             if not isinstance(other, np.ndarray):
@@ -310,19 +313,22 @@ def __eq__(self, other):
             if other_array.ndim == 1 and other_array.dtype.kind != 'O':
 
                 # Treat as ragged scalar
-                result = _eq_ragged_scalar(self, other_array)
+                result = _eq_ragged_scalar(
+                    self.start_indices, self.flat_array, other_array)
             elif (other_array.ndim == 1 and
                   other_array.dtype.kind == 'O' and
                   len(other_array) == len(self)):
 
                 # Treat as vector
-                result = _eq_ragged_ndarray1d(self, other_array)
+                result = _eq_ragged_ndarray1d(
+                    self.start_indices, self.flat_array, other_array)
             elif (other_array.ndim == 2 and
                   other_array.dtype.kind != 'O' and
                   other_array.shape[0] == len(self)):
 
                 # Treat rows as ragged elements
-                result = _eq_ragged_ndarray2d(self, other_array)
+                result = _eq_ragged_ndarray2d(
+                    self.start_indices, self.flat_array, other_array)
             else:
                 raise ValueError("""
 Cannot check equality of RaggedArray of length {ra_len} with:
@@ -791,14 +797,24 @@ def astype(self, dtype, copy=True):
         return np.array([v for v in self], dtype=dtype, copy=copy)
 
 
-def _eq_ragged_ragged(ra1, ra2):
+@jit(nopython=True, nogil=True)
+def _eq_ragged_ragged(start_indices1,
+                      flat_array1,
+                      start_indices2,
+                      flat_array2):
     """
     Compare elements of two ragged arrays of the same length
 
     Parameters
     ----------
-    ra1: RaggedArray
-    ra2: RaggedArray
+    start_indices1: ndarray
+        start indices of a RaggedArray 1
+    flat_array1: ndarray
+        flat_array property of a RaggedArray 1
+    start_indices2: ndarray
+        start indices of a RaggedArray 2
+    flat_array2: ndarray
+        flat_array property of a RaggedArray 2
 
     Returns
     -------
@@ -806,40 +822,50 @@ def _eq_ragged_ragged(ra1, ra2):
         1D bool array of same length as inputs with elements True when
         corresponding elements are equal, False otherwise
     """
-    start_indices1 = ra1.start_indices
-    flat_array1 = ra1.flat_array
-
-    start_indices2 = ra2.start_indices
-    flat_array2 = ra2.flat_array
-
     n = len(start_indices1)
     m1 = len(flat_array1)
     m2 = len(flat_array2)
 
-    result = np.zeros(n, dtype=np.bool)
+    result = np.zeros(n, dtype=np.bool_)
 
     for i in range(n):
         # Extract inds for ra1
         start_index1 = start_indices1[i]
         stop_index1 = start_indices1[i + 1] if i < n - 1 else m1
+        len_1 = stop_index1 - start_index1
 
         # Extract inds for ra2
         start_index2 = start_indices2[i]
         stop_index2 = start_indices2[i + 1] if i < n - 1 else m2
+        len_2 = stop_index2 - start_index2
+
+        if len_1 != len_2:
+            el_equal = False
+        else:
+            el_equal = True
+            for flat_index1, flat_index2 in \
+                    zip(range(start_index1, stop_index1),
+                        range(start_index2, stop_index2)):
+                el_1 = flat_array1[flat_index1]
+                el_2 = flat_array2[flat_index2]
+                el_equal &= el_1 == el_2
 
-        result[i] = np.array_equal(flat_array1[start_index1:stop_index1],
-                                   flat_array2[start_index2:stop_index2])
+        result[i] = el_equal
 
     return result
 
 
-def _eq_ragged_scalar(ra, val):
+@jit(nopython=True, nogil=True)
+def _eq_ragged_scalar(start_indices, flat_array, val):
     """
     Compare elements of a RaggedArray with a scalar array
 
     Parameters
     ----------
-    ra: RaggedArray
+    start_indices: ndarray
+        start indices of a RaggedArray
+    flat_array: ndarray
+        flat_array property of a RaggedArray
     val: ndarray
 
     Returns
@@ -848,27 +874,36 @@ def _eq_ragged_scalar(ra, val):
         1D bool array of same length as inputs with elements True when
         ragged element equals scalar val, False otherwise.
     """
-    start_indices = ra.start_indices
-    flat_array = ra.flat_array
-
     n = len(start_indices)
     m = len(flat_array)
-    result = np.zeros(n, dtype=np.bool)
+    cols = len(val)
+    result = np.zeros(n, dtype=np.bool_)
     for i in range(n):
         start_index = start_indices[i]
         stop_index = start_indices[i+1] if i < n - 1 else m
-        result[i] = np.array_equal(flat_array[start_index:stop_index], val)
+
+        if stop_index - start_index != cols:
+            el_equal = False
+        else:
+            el_equal = True
+            for val_index, flat_index in \
+                    enumerate(range(start_index, stop_index)):
+                el_equal &= flat_array[flat_index] == val[val_index]
+        result[i] = el_equal
 
     return result
 
 
-def _eq_ragged_ndarray1d(ra, a):
+def _eq_ragged_ndarray1d(start_indices, flat_array, a):
     """
     Compare a RaggedArray with a 1D numpy object array of the same length
 
     Parameters
     ----------
-    ra: RaggedArray
+    start_indices: ndarray
+        start indices of a RaggedArray
+    flat_array: ndarray
+        flat_array property of a RaggedArray
     a: ndarray
         1D numpy array of same length as ra
 
@@ -877,13 +912,16 @@ def _eq_ragged_ndarray1d(ra, a):
     mask: ndarray
         1D bool array of same length as input with elements True when
         corresponding elements are equal, False otherwise
+
+    Notes
+    -----
+    This function is not numba accelerated because it, but design, inputs
+    a numpy object array
     """
-    start_indices = ra.start_indices
-    flat_array = ra.flat_array
 
     n = len(start_indices)
     m = len(flat_array)
-    result = np.zeros(n, dtype=np.bool)
+    result = np.zeros(n, dtype=np.bool_)
     for i in range(n):
         start_index = start_indices[i]
         stop_index = start_indices[i + 1] if i < n - 1 else m
@@ -899,13 +937,17 @@ def _eq_ragged_ndarray1d(ra, a):
     return result
 
 
-def _eq_ragged_ndarray2d(ra, a):
+@jit(nopython=True, nogil=True)
+def _eq_ragged_ndarray2d(start_indices, flat_array, a):
     """
     Compare a RaggedArray with rows of a 2D numpy object array
 
     Parameters
     ----------
-    ra: RaggedArray
+    start_indices: ndarray
+        start indices of a RaggedArray
+    flat_array: ndarray
+        flat_array property of a RaggedArray
     a: ndarray
         A 2D numpy array where the length of the first dimension matches the
         length of the RaggedArray
@@ -916,20 +958,29 @@ def _eq_ragged_ndarray2d(ra, a):
         1D bool array of same length as input RaggedArray with elements True
         when corresponding elements of ra equals corresponding row of a
     """
-    start_indices = ra.start_indices
-    flat_array = ra.flat_array
-
     n = len(start_indices)
     m = len(flat_array)
-    result = np.zeros(n, dtype=np.bool)
-    for i in range(n):
-        start_index = start_indices[i]
-        stop_index = start_indices[i + 1] if i < n - 1 else m
-        result[i] = np.array_equal(flat_array[start_index:stop_index],
-                                   a[i, :])
+    cols = a.shape[1]
+
+    # np.bool is an alias for Python's built-in bool type, np.bool_ is the
+    # numpy type that numba recognizes
+    result = np.zeros(n, dtype=np.bool_)
+    for row in range(n):
+        start_index = start_indices[row]
+        stop_index = start_indices[row + 1] if row < n - 1 else m
+
+        # Check equality
+        if stop_index - start_index != cols:
+            el_equal = False
+        else:
+            el_equal = True
+            for col, flat_index in enumerate(range(start_index, stop_index)):
+                el_equal &= flat_array[flat_index] == a[row, col]
+        result[row] = el_equal
     return result
 
 
+@jit(nopython=True, nogil=True)
 def _lexograph_lt(a1, a2):
     """
     Compare two 1D numpy arrays lexographically
diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index 9aabb9759..90380f841 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -425,7 +425,7 @@ def test_concat_series():
 ])
 def test_array_eq_scalar(scalar):
     # Build RaggedArray
-    arg1 = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]]
+    arg1 = [[1, 2], [], [1, 2], [1, 3], [11, 22, 33, 44]]
     ra = RaggedArray(arg1, dtype='int32')
 
     # Check equality
@@ -445,11 +445,12 @@ def test_array_eq_numpy1():
 
     # Construct arrays
     ra = RaggedArray(arg1, dtype='int32')
-    npa = np.array(arg1, dtype='object')
+    npa = np.array([[1, 2], [2], [1, 2], None, [10, 20, 30, 40]],
+                   dtype='object')
 
     # Check equality
     result = ra == npa
-    expected = np.array([1, 1, 1, 1, 1], dtype='bool')
+    expected = np.array([1, 0, 1, 1, 0], dtype='bool')
     np.testing.assert_array_equal(result, expected)
 
     # Check non-equality
@@ -460,7 +461,7 @@ def test_array_eq_numpy1():
 
 def test_array_eq_numpy2d():
     # Construct arrays
-    ra = RaggedArray([[1, 2], [], [1, 2], None, [11, 22, 33, 44]],
+    ra = RaggedArray([[1, 2], [1], [1, 2], None, [33, 44]],
                      dtype='int32')
     npa = np.array([[1, 2], [2, 3], [1, 2], [0, 1], [11, 22]],
                    dtype='int32')
@@ -478,16 +479,16 @@ def test_array_eq_numpy2d():
 
 def test_array_eq_ragged():
     # Build RaggedArray
-    arg1 = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]]
+    arg1 = [[1, 2], [], [1, 2], [3, 2, 1], [11, 22, 33, 44]]
     ra1 = RaggedArray(arg1, dtype='int32')
 
     # Build RaggedArray
-    arg2 = [[1, 2], [2, 3, 4, 5], [1, 2], None, [11]]
+    arg2 = [[1, 2], [2, 3, 4, 5], [1, 2], [11, 22, 33], [11]]
     ra2 = RaggedArray(arg2, dtype='int32')
 
     # Check equality
     result = ra1 == ra2
-    expected = np.array([1, 0, 1, 1, 0], dtype='bool')
+    expected = np.array([1, 0, 1, 0, 0], dtype='bool')
     np.testing.assert_array_equal(result, expected)
 
     # Check non-equality

From 3fda78684b192e37fd1f83a1450d611bb3421e14 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Thu, 17 Jan 2019 20:08:11 -0500
Subject: [PATCH 26/45] Don't auto-import RaggedArray unless pandas is at least
 version 0.24.0

---
 datashader/__init__.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/datashader/__init__.py b/datashader/__init__.py
index 747f21161..0aeb61954 100644
--- a/datashader/__init__.py
+++ b/datashader/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import absolute_import
 
+from distutils.version import LooseVersion
+
 import param
 __version__ = str(param.version.Version(fpath=__file__, archive_commit="$Format:%h$",reponame="datashader"))
 
@@ -15,8 +17,11 @@
 except ImportError:
     pass
 
-# Make ragged pandas extension array available
-from . import datatypes
+# Make RaggedArray pandas extension array available for
+# pandas >= 0.24.0 is installed
+from pandas import __version__ as pandas_version
+if LooseVersion(pandas_version) >= LooseVersion('0.24.0'):
+    from . import datatypes
 
 # make pyct's example/data commands available if possible
 from functools import partial

From 04453ce3ea9a797731e6e8b1b5d3f34126db1677 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Sun, 20 Jan 2019 05:38:06 -0500
Subject: [PATCH 27/45] wrap _compute_*_bounds static methods with
 compute_*_bounds methods

This allows subclasses to override how the DataFrame is used to compute the bounds
---
 datashader/dask.py   |  4 ++--
 datashader/glyphs.py | 18 ++++++++++++++++--
 datashader/pandas.py |  4 ++--
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/datashader/dask.py b/datashader/dask.py
index e39a8fecf..9babd1d22 100644
--- a/datashader/dask.py
+++ b/datashader/dask.py
@@ -31,8 +31,8 @@ def dask_pipeline(df, schema, canvas, glyph, summary):
 
 
 def shape_bounds_st_and_axis(df, canvas, glyph):
-    x_range = canvas.x_range or glyph._compute_x_bounds_dask(df)
-    y_range = canvas.y_range or glyph._compute_y_bounds_dask(df)
+    x_range = canvas.x_range or glyph.compute_x_bounds_dask(df)
+    y_range = canvas.y_range or glyph.compute_y_bounds_dask(df)
     x_min, x_max, y_min, y_max = bounds = compute(*(x_range + y_range))
     x_range, y_range = (x_min, x_max), (y_min, y_max)
 
diff --git a/datashader/glyphs.py b/datashader/glyphs.py
index 213c5e90f..a769aed23 100644
--- a/datashader/glyphs.py
+++ b/datashader/glyphs.py
@@ -27,6 +27,12 @@ def validate(self, in_dshape):
         elif not isreal(in_dshape.measure[self.y]):
             raise ValueError('y must be real')
 
+    def compute_x_bounds(self, df):
+        return self._compute_x_bounds(df[self.x].values)
+
+    def compute_y_bounds(self, df):
+        return self._compute_y_bounds(df[self.y].values)
+
     @staticmethod
     @ngjit
     def _compute_x_bounds(xs):
@@ -68,7 +74,7 @@ def _compute_y_bounds(ys):
         return minval, maxval
 
     @memoize
-    def _compute_x_bounds_dask(self, df):
+    def compute_x_bounds_dask(self, df):
         """Like ``PointLike._compute_x_bounds``, but memoized because
         ``df`` is immutable/hashable (a Dask dataframe).
         """
@@ -85,7 +91,7 @@ def _compute_x_bounds_dask(self, df):
         
 
     @memoize
-    def _compute_y_bounds_dask(self, df):
+    def compute_y_bounds_dask(self, df):
         """Like ``PointLike._compute_y_bounds``, but memoized because
         ``df`` is immutable/hashable (a Dask dataframe).
         """
@@ -129,6 +135,14 @@ def validate(self, in_dshape):
             if not isreal(in_dshape.measure[col]):
                 raise ValueError('{} must be real'.format(col))
 
+    def compute_x_bounds(self, df):
+        xs = df[self.x].values
+        return self._compute_x_bounds(xs.reshape(np.prod(xs.shape)))
+
+    def compute_y_bounds(self, df):
+        ys = df[self.y].values
+        return self._compute_y_bounds(ys.reshape(np.prod(ys.shape)))
+
 
 class Point(_PointLike):
     """A point, with center at ``x`` and ``y``.
diff --git a/datashader/pandas.py b/datashader/pandas.py
index 191bbf38e..54f9ecc11 100644
--- a/datashader/pandas.py
+++ b/datashader/pandas.py
@@ -26,8 +26,8 @@ def pointlike(glyph, df, schema, canvas, summary):
     y_mapper = canvas.y_axis.mapper
     extend = glyph._build_extend(x_mapper, y_mapper, info, append)
 
-    x_range = canvas.x_range or glyph._compute_x_bounds(df[glyph.x].values)
-    y_range = canvas.y_range or glyph._compute_y_bounds(df[glyph.y].values)
+    x_range = canvas.x_range or glyph.compute_x_bounds(df)
+    y_range = canvas.y_range or glyph.compute_y_bounds(df)
 
     width = canvas.plot_width
     height = canvas.plot_height

From 642a8581b122350f74c73d137d6f97db5ba084be Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Sun, 20 Jan 2019 05:52:31 -0500
Subject: [PATCH 28/45] Small refactor to remove the need for a specialized
 _PolygonLike glyph_dispatch

---
 datashader/glyphs.py |  4 +++-
 datashader/pandas.py | 28 ----------------------------
 2 files changed, 3 insertions(+), 29 deletions(-)

diff --git a/datashader/glyphs.py b/datashader/glyphs.py
index a769aed23..375c3c65a 100644
--- a/datashader/glyphs.py
+++ b/datashader/glyphs.py
@@ -233,8 +233,10 @@ def _build_extend(self, x_mapper, y_mapper, info, append):
         draw_triangle, draw_triangle_interp = _build_draw_triangle(append)
         map_onto_pixel = _build_map_onto_pixel_for_triangle(x_mapper, y_mapper)
         extend_triangles = _build_extend_triangles(draw_triangle, draw_triangle_interp, map_onto_pixel)
+        weight_type = self.weight_type
+        interpolate = self.interpolate
 
-        def extend(aggs, df, vt, bounds, weight_type=True, interpolate=True):
+        def extend(aggs, df, vt, bounds, plot_start=True):
             cols = info(df)
             assert cols, 'There must be at least one column on which to aggregate'
             # mapped to pixels, then may be clipped
diff --git a/datashader/pandas.py b/datashader/pandas.py
index 54f9ecc11..e9000a1c8 100644
--- a/datashader/pandas.py
+++ b/datashader/pandas.py
@@ -42,31 +42,3 @@ def pointlike(glyph, df, schema, canvas, summary):
     extend(bases, df, x_st + y_st, x_range + y_range)
 
     return finalize(bases, coords=[y_axis, x_axis], dims=[glyph.y, glyph.x])
-
-
-
-@glyph_dispatch.register(_PolygonLike)
-def polygonlike(glyph, df, schema, canvas, summary):
-    create, info, append, _, finalize = compile_components(summary, schema, glyph)
-    x_mapper = canvas.x_axis.mapper
-    y_mapper = canvas.y_axis.mapper
-    extend = glyph._build_extend(x_mapper, y_mapper, info, append)
-
-    xs = df[glyph.x].values
-    x_range = canvas.x_range or glyph._compute_x_bounds(xs.reshape(np.prod(xs.shape)))
-    ys = df[glyph.y].values
-    y_range = canvas.y_range or glyph._compute_y_bounds(ys.reshape(np.prod(ys.shape)))
-
-    width = canvas.plot_width
-    height = canvas.plot_height
-
-    x_st = canvas.x_axis.compute_scale_and_translate(x_range, width)
-    y_st = canvas.y_axis.compute_scale_and_translate(y_range, height)
-
-    x_axis = canvas.x_axis.compute_index(x_st, width)
-    y_axis = canvas.y_axis.compute_index(y_st, height)
-
-    bases = create((height, width))
-    extend(bases, df, x_st + y_st, x_range + y_range, weight_type=glyph.weight_type, interpolate=glyph.interpolate)
-
-    return finalize(bases, coords=[y_axis, x_axis], dims=[glyph.y, glyph.x])

From 97bccf51faee70d1abdb4de8cde547d4d7d746dc Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Sun, 20 Jan 2019 06:29:38 -0500
Subject: [PATCH 29/45] Refactor to extract required_columns glyph method

---
 datashader/core.py   | 7 +++----
 datashader/glyphs.py | 7 ++++++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/datashader/core.py b/datashader/core.py
index 40c40beab..582189e68 100644
--- a/datashader/core.py
+++ b/datashader/core.py
@@ -541,10 +541,9 @@ def bypixel(source, canvas, glyph, agg):
 
 def _cols_to_keep(columns, glyph, agg):
     cols_to_keep = OrderedDict({col: False for col in columns})
-    cols_to_keep[glyph.x] = True
-    cols_to_keep[glyph.y] = True
-    if hasattr(glyph, 'z'):
-        cols_to_keep[glyph.z[0]] = True
+    for col in glyph.required_columns():
+        cols_to_keep[col] = True
+
     if hasattr(agg, 'values'):
         for subagg in agg.values:
             if subagg.column is not None:
diff --git a/datashader/glyphs.py b/datashader/glyphs.py
index 375c3c65a..e2b925b6a 100644
--- a/datashader/glyphs.py
+++ b/datashader/glyphs.py
@@ -27,6 +27,9 @@ def validate(self, in_dshape):
         elif not isreal(in_dshape.measure[self.y]):
             raise ValueError('y must be real')
 
+    def required_columns(self):
+        return [self.x, self.y]
+
     def compute_x_bounds(self, df):
         return self._compute_x_bounds(df[self.x].values)
 
@@ -88,7 +91,6 @@ def compute_x_bounds_dask(self, df):
             #print("No x range; defaulting to x-1,x+1")
             minval, maxval = minval-1, minval+1
         return minval, maxval
-        
 
     @memoize
     def compute_y_bounds_dask(self, df):
@@ -135,6 +137,9 @@ def validate(self, in_dshape):
             if not isreal(in_dshape.measure[col]):
                 raise ValueError('{} must be real'.format(col))
 
+    def required_columns(self):
+        return [self.x, self.y] + list(self.z)
+
     def compute_x_bounds(self, df):
         xs = df[self.x].values
         return self._compute_x_bounds(xs.reshape(np.prod(xs.shape)))

From 2860511fcc1ef571d02734a0f6c2c4919e027327 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Sun, 20 Jan 2019 07:26:07 -0500
Subject: [PATCH 30/45] Initial cvs.lines and LinesXY glyph

---
 datashader/core.py              | 39 +++++++++++++++++
 datashader/glyphs.py            | 78 +++++++++++++++++++++++++++++++++
 datashader/pandas.py            |  4 +-
 datashader/tests/test_pandas.py | 34 ++++++++++++++
 4 files changed, 154 insertions(+), 1 deletion(-)

diff --git a/datashader/core.py b/datashader/core.py
index 582189e68..ed258bf07 100644
--- a/datashader/core.py
+++ b/datashader/core.py
@@ -187,6 +187,45 @@ def line(self, source, x, y, agg=None):
             agg = any_rdn()
         return bypixel(source, self, Line(x, y), agg)
 
+    def lines(self, source, x, y, agg=None,
+              x_constant=None, y_constant=None):
+        """Compute a reduction by pixel, mapping each row of source to pixels
+        in a distinct line
+
+        Parameters
+        ----------
+        source : pandas.DataFrame, dask.DataFrame, or xarray.DataArray/Dataset
+            The input datasource.
+        x, y: str or list
+            The x and y coordinates defining the line segments. Either xs and
+            ys are both strings or both lists.
+                * str: The name of a RaggedArray column in source that
+                       contains the x or y coordinates of the line segments.
+                * list: A list of the names of float or integer
+                               columns that contains the x or y coordinates of
+                               the line segment
+        agg : Reduction, optional
+            Reduction to compute. Default is ``any()``.
+        x_constant, y_constant: list or array of numbers
+            If xs is set to a list of column labels then y_constants may be
+            set to a list of numbers the same length as xs. These y
+            coordinates will be applied to every row.  Similarly, if ys is
+            a list of column labels, x_constants may be set to a list of
+            numbers to specify the x coordinates to be applied to every line
+            segment.
+
+            Exactly one of xs and x_constants may be specified and exactly
+            one of ys and y_constants may be specified.
+        """
+        from .glyphs import LinesXY
+        from .reductions import any as any_rdn
+        if agg is None:
+            agg = any_rdn()
+        return bypixel(source,
+                       self,
+                       LinesXY(tuple(x), tuple(y)),
+                       agg)
+
     # TODO re 'untested', below: Consider replacing with e.g. a 3x3
     # array in the call to Canvas (plot_height=3,plot_width=3), then
     # show the output as a numpy array that has a compact
diff --git a/datashader/glyphs.py b/datashader/glyphs.py
index e2b925b6a..fbe44b202 100644
--- a/datashader/glyphs.py
+++ b/datashader/glyphs.py
@@ -27,6 +27,14 @@ def validate(self, in_dshape):
         elif not isreal(in_dshape.measure[self.y]):
             raise ValueError('y must be real')
 
+    @property
+    def x_label(self):
+        return self.x
+
+    @property
+    def y_label(self):
+        return self.y
+
     def required_columns(self):
         return [self.x, self.y]
 
@@ -225,6 +233,49 @@ def extend(aggs, df, vt, bounds, plot_start=True):
         return extend
 
 
+class LinesXY(_PointLike):
+    def validate(self, in_dshape):
+        # TODO
+        pass
+
+    def required_columns(self):
+        return self.x + self.y
+
+    @property
+    def x_label(self):
+        return 'x'
+
+    @property
+    def y_label(self):
+        return 'y'
+
+    def compute_x_bounds(self, df):
+        # return self._compute_x_bounds(df[self.x].values)
+        raise NotImplementedError()
+
+    def compute_y_bounds(self, df):
+        # return self._compute_y_bounds(df[self.y].values)
+        raise NotImplementedError()
+
+    @memoize
+    def _build_extend(self, x_mapper, y_mapper, info, append):
+        draw_line = _build_draw_line(append)
+        map_onto_pixel = _build_map_onto_pixel_for_line(x_mapper, y_mapper)
+        extend_lines_xy = _build_extend_lines_xy(draw_line, map_onto_pixel)
+        x_names = self.x
+        y_names = self.y
+
+        def extend(aggs, df, vt, bounds, plot_start=True):
+            xs = tuple(df[x_name].values for x_name in x_names)
+            ys = tuple(df[y_name].values for y_name in y_names)
+
+            cols = aggs + info(df)
+            # line may be clipped, then mapped to pixels
+            extend_lines_xy(vt, bounds, xs, ys, plot_start, *cols)
+
+        return extend
+
+
 class Triangles(_PolygonLike):
     """An unstructured mesh of triangles, with vertices defined by ``xs`` and ``ys``.
 
@@ -465,6 +516,33 @@ def extend_line(vt, bounds, xs, ys, plot_start, *aggs_and_cols):
     return extend_line
 
 
+def _build_extend_lines_xy(draw_line, map_onto_pixel):
+    extend_line = _build_extend_line(draw_line, map_onto_pixel)
+
+    @ngjit
+    def extend_lines_xy(vt, bounds, xs, ys, plot_start, *aggs_and_cols):
+        """
+        here xs and ys are tuples of arrays and non-empty
+        """
+        cols = len(xs)
+        rows = len(xs[0])
+        line_xs = np.zeros(cols, dtype=xs[0].dtype)
+        line_ys = np.zeros(cols, dtype=xs[0].dtype)
+
+        for r in range(rows):
+            # populate line_xs/line_ys
+            for c in range(cols):
+                line_xs[c] = xs[c][r]
+                line_ys[c] = ys[c][r]
+
+            # extend line
+            extend_line(
+                vt, bounds, line_xs, line_ys, plot_start, *aggs_and_cols)
+
+    return extend_lines_xy
+
+
+
 def _build_draw_triangle(append):
     """Specialize a triangle plotting kernel for a given append/axis combination"""
     @ngjit
diff --git a/datashader/pandas.py b/datashader/pandas.py
index e9000a1c8..dffe377d8 100644
--- a/datashader/pandas.py
+++ b/datashader/pandas.py
@@ -41,4 +41,6 @@ def pointlike(glyph, df, schema, canvas, summary):
     bases = create((height, width))
     extend(bases, df, x_st + y_st, x_range + y_range)
 
-    return finalize(bases, coords=[y_axis, x_axis], dims=[glyph.y, glyph.x])
+    return finalize(bases,
+                    coords=[y_axis, x_axis],
+                    dims=[glyph.y_label, glyph.x_label])
diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py
index b5e2a0057..239bff407 100644
--- a/datashader/tests/test_pandas.py
+++ b/datashader/tests/test_pandas.py
@@ -593,3 +593,37 @@ def test_bug_570():
     yi, xi = np.where(agg.values == 1)
     assert np.array_equal(yi, np.arange(73, 300))
     assert np.array_equal(xi, np.array([590] * len(yi)))
+
+
+def test_lines_xy():
+    axis = ds.core.LinearAxis()
+    lincoords = axis.compute_index(axis.compute_scale_and_translate((-3., 3.), 7), 7)
+
+    df = pd.DataFrame({
+        'x0': [4, -4],
+        'x1': [0,  0],
+        'x2': [-4, 4],
+        'y0': [0,  0],
+        'y1': [-4, 4],
+        'y2': [0,  0]
+    })
+
+    cvs = ds.Canvas(plot_width=7, plot_height=7,
+                    x_range=(-3, 3), y_range=(-3, 3))
+
+    agg = cvs.lines(df,
+                    ['x0', 'x1', 'x2'],
+                    ['y0', 'y1', 'y2'],
+                    ds.count())
+
+    sol = np.array([[0, 0, 1, 0, 1, 0, 0],
+                    [0, 1, 0, 0, 0, 1, 0],
+                    [1, 0, 0, 0, 0, 0, 1],
+                    [0, 0, 0, 0, 0, 0, 0],
+                    [1, 0, 0, 0, 0, 0, 1],
+                    [0, 1, 0, 0, 0, 1, 0],
+                    [0, 0, 1, 0, 1, 0, 0]], dtype='i4')
+
+    out = xr.DataArray(sol, coords=[lincoords, lincoords],
+                       dims=['y', 'x'])
+    assert_eq(agg, out)
\ No newline at end of file

From d7cf092a2778b8e3fdb975a18302cdcf80d3982b Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Sun, 20 Jan 2019 13:15:18 -0500
Subject: [PATCH 31/45] WIP of LinesRagged type

---
 datashader/core.py              | 28 +++++-----
 datashader/glyphs.py            | 94 +++++++++++++++++++++++++++++++++
 datashader/tests/test_pandas.py | 30 ++++++++++-
 3 files changed, 135 insertions(+), 17 deletions(-)

diff --git a/datashader/core.py b/datashader/core.py
index ed258bf07..3dc59aee6 100644
--- a/datashader/core.py
+++ b/datashader/core.py
@@ -7,6 +7,8 @@
 from xarray import DataArray, Dataset
 from collections import OrderedDict
 
+from datashader.datatypes import RaggedArray
+from datashader.glyphs import LinesRagged
 from .utils import Dispatcher, ngjit, calc_res, calc_bbox, orient_array, compute_coords
 from .utils import get_indices, dshape_from_pandas, dshape_from_dask
 from .utils import Expr # noqa (API import)
@@ -202,29 +204,23 @@ def lines(self, source, x, y, agg=None,
                 * str: The name of a RaggedArray column in source that
                        contains the x or y coordinates of the line segments.
                 * list: A list of the names of float or integer
-                               columns that contains the x or y coordinates of
-                               the line segment
+                        columns that contains the x or y coordinates of
+                        the line segment
         agg : Reduction, optional
             Reduction to compute. Default is ``any()``.
-        x_constant, y_constant: list or array of numbers
-            If xs is set to a list of column labels then y_constants may be
-            set to a list of numbers the same length as xs. These y
-            coordinates will be applied to every row.  Similarly, if ys is
-            a list of column labels, x_constants may be set to a list of
-            numbers to specify the x coordinates to be applied to every line
-            segment.
-
-            Exactly one of xs and x_constants may be specified and exactly
-            one of ys and y_constants may be specified.
         """
         from .glyphs import LinesXY
         from .reductions import any as any_rdn
         if agg is None:
             agg = any_rdn()
-        return bypixel(source,
-                       self,
-                       LinesXY(tuple(x), tuple(y)),
-                       agg)
+        # TODO: Check inputs and make LinesRagged, LinesX, LinesY
+
+        if isinstance(x, RaggedArray):
+            glyph = LinesRagged(x, y)
+        else:
+            glyph = LinesXY(tuple(x), tuple(y))
+
+        return bypixel(source, self, glyph, agg)
 
     # TODO re 'untested', below: Consider replacing with e.g. a 3x3
     # array in the call to Canvas (plot_height=3,plot_width=3), then
diff --git a/datashader/glyphs.py b/datashader/glyphs.py
index fbe44b202..e99ad218c 100644
--- a/datashader/glyphs.py
+++ b/datashader/glyphs.py
@@ -276,6 +276,41 @@ def extend(aggs, df, vt, bounds, plot_start=True):
         return extend
 
 
+class LinesRagged(_PointLike):
+    def validate(self, in_dshape):
+        # TODO
+        pass
+
+    def required_columns(self):
+        return self.x + self.y
+
+    def compute_x_bounds(self, df):
+        # return self._compute_x_bounds(df[self.x].values)
+        raise NotImplementedError()
+
+    def compute_y_bounds(self, df):
+        # return self._compute_y_bounds(df[self.y].values)
+        raise NotImplementedError()
+
+    @memoize
+    def _build_extend(self, x_mapper, y_mapper, info, append):
+        draw_line = _build_draw_line(append)
+        map_onto_pixel = _build_map_onto_pixel_for_line(x_mapper, y_mapper)
+        extend_lines_ragged = _build_extend_lines_ragged(draw_line, map_onto_pixel)
+        x_name = self.x
+        y_name = self.y
+
+        def extend(aggs, df, vt, bounds, plot_start=True):
+            xs = df[x_name].array
+            ys = df[y_name].array
+
+            cols = aggs + info(df)
+            # line may be clipped, then mapped to pixels
+            extend_lines_ragged(vt, bounds, xs, ys, plot_start, *cols)
+
+        return extend
+
+
 class Triangles(_PolygonLike):
     """An unstructured mesh of triangles, with vertices defined by ``xs`` and ``ys``.
 
@@ -542,6 +577,65 @@ def extend_lines_xy(vt, bounds, xs, ys, plot_start, *aggs_and_cols):
     return extend_lines_xy
 
 
+def _build_extend_lines_ragged(draw_line, map_onto_pixel):
+    extend_line = _build_extend_line(draw_line, map_onto_pixel)
+
+    def extend_lines_ragged(vt, bounds, xs, ys, plot_start, *aggs_and_cols):
+        """
+        here xs and ys are tuples of arrays and non-empty
+        """
+        x_start_indices = xs.start_indices
+        x_flat_array = xs.flat_array
+
+        y_start_indices = ys.start_indices
+        y_flat_array = ys.flat_array
+
+        perform_extend_lines_ragged(vt,
+                                    bounds,
+                                    x_start_indices,
+                                    x_flat_array,
+                                    y_start_indices,
+                                    y_flat_array,
+                                    plot_start,
+                                    *aggs_and_cols)
+
+    # @ngjit
+    def perform_extend_lines_ragged(vt,
+                                    bounds,
+                                    x_start_indices,
+                                    x_flat_array,
+                                    y_start_indices,
+                                    y_flat_array,
+                                    plot_start,
+                                    *aggs_and_cols):
+
+        x_flat_len = len(x_flat_array)
+        y_flat_len = len(y_flat_array)
+
+        rows = len(x_start_indices)
+        for r in range(rows):
+            # Get x index range
+            x_start_index = x_start_indices[r]
+            x_stop_index = (x_start_indices[r + 1]
+                            if r < rows - 1
+                            else x_flat_len)
+
+            # Get y index range
+            y_start_index = y_start_indices[r]
+            y_stop_index = (y_start_indices[r + 1]
+                            if r < rows - 1
+                            else y_flat_len)
+
+            # Build line slices
+            line_xs = x_flat_array[x_start_index:x_stop_index]
+            line_ys = y_flat_array[y_start_index:y_stop_index]
+
+            # Perform extend line
+            extend_line(
+                vt, bounds, line_xs, line_ys, plot_start, *aggs_and_cols)
+
+    return extend_lines_ragged
+
 
 def _build_draw_triangle(append):
     """Specialize a triangle plotting kernel for a given append/axis combination"""
diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py
index 239bff407..99458b71a 100644
--- a/datashader/tests/test_pandas.py
+++ b/datashader/tests/test_pandas.py
@@ -626,4 +626,32 @@ def test_lines_xy():
 
     out = xr.DataArray(sol, coords=[lincoords, lincoords],
                        dims=['y', 'x'])
-    assert_eq(agg, out)
\ No newline at end of file
+    assert_eq(agg, out)
+
+
+def test_lines_ragged():
+    axis = ds.core.LinearAxis()
+    lincoords = axis.compute_index(
+        axis.compute_scale_and_translate((-3., 3.), 7), 7)
+
+    df = pd.DataFrame({
+        'x': pd.array([[4, -4], [-4, 4, -4, 4]], dtype='Ragged[float32]'),
+        'y': pd.array([[0, 0], [-4, 4, 0, 0]], dtype='Ragged[float32]')
+    })
+
+    cvs = ds.Canvas(plot_width=7, plot_height=7,
+                    x_range=(-3, 3), y_range=(-3, 3))
+
+    agg = cvs.lines(df, 'x', 'y', ds.count())
+
+    sol = np.array([[0, 0, 1, 0, 1, 0, 0],
+                    [0, 1, 0, 0, 0, 1, 0],
+                    [1, 0, 0, 0, 0, 0, 1],
+                    [0, 0, 0, 0, 0, 0, 0],
+                    [1, 0, 0, 0, 0, 0, 1],
+                    [0, 1, 0, 0, 0, 1, 0],
+                    [0, 0, 1, 0, 1, 0, 0]], dtype='i4')
+
+    out = xr.DataArray(sol, coords=[lincoords, lincoords],
+                       dims=['y', 'x'])
+    assert_eq(agg, out)

From ea08fd1984846e8e739c58e367c61b4090a1632b Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Thu, 7 Feb 2019 18:34:38 -0500
Subject: [PATCH 32/45] Remove unused canvas.lines method

---
 datashader/core.py | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/datashader/core.py b/datashader/core.py
index a56e9c70b..cbaedd36a 100644
--- a/datashader/core.py
+++ b/datashader/core.py
@@ -294,38 +294,6 @@ def line(self, source, x, y, agg=None, axis=0):
 
         return bypixel(source, self, glyph, agg)
 
-    def lines(self, source, x, y, agg=None,
-              x_constant=None, y_constant=None):
-        """Compute a reduction by pixel, mapping each row of source to pixels
-        in a distinct line
-
-        Parameters
-        ----------
-        source : pandas.DataFrame, dask.DataFrame, or xarray.DataArray/Dataset
-            The input datasource.
-        x, y: str or list
-            The x and y coordinates defining the line segments. Either xs and
-            ys are both strings or both lists.
-                * str: The name of a RaggedArray column in source that
-                       contains the x or y coordinates of the line segments.
-                * list: A list of the names of float or integer
-                        columns that contains the x or y coordinates of
-                        the line segment
-        agg : Reduction, optional
-            Reduction to compute. Default is ``any()``.
-        """
-        from .glyphs import LinesXY
-        from .reductions import any as any_rdn
-        if agg is None:
-            agg = any_rdn()
-        # TODO: Check inputs and make LinesRagged, LinesX, LinesY
-
-        if isinstance(x, RaggedArray):
-            glyph = LinesRagged(x, y)
-        else:
-            glyph = LinesXY(tuple(x), tuple(y))
-
-        return bypixel(source, self, glyph, agg)
 
     # TODO re 'untested', below: Consider replacing with e.g. a 3x3
     # array in the call to Canvas (plot_height=3,plot_width=3), then

From 1b02b0d73edbd1eb8ce59a8622c9504485dafb09 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Fri, 8 Feb 2019 07:58:37 -0500
Subject: [PATCH 33/45] Add RaggedArray line aggregation support for pandas

---
 datashader/core.py              | 40 +++++++++++++++----
 datashader/glyphs.py            | 70 ++++++++++++++++++++-------------
 datashader/tests/test_pandas.py | 45 ++++++++++++++-------
 datashader/utils.py             |  4 ++
 4 files changed, 110 insertions(+), 49 deletions(-)

diff --git a/datashader/core.py b/datashader/core.py
index cbaedd36a..911987780 100644
--- a/datashader/core.py
+++ b/datashader/core.py
@@ -10,8 +10,6 @@
 from xarray import DataArray, Dataset
 from collections import OrderedDict
 
-from datashader.datatypes import RaggedArray
-from datashader.glyphs import LinesRagged
 from .utils import Dispatcher, ngjit, calc_res, calc_bbox, orient_array, compute_coords
 from .utils import get_indices, dshape_from_pandas, dshape_from_dask
 from .utils import Expr # noqa (API import)
@@ -202,6 +200,7 @@ def line(self, source, x, y, agg=None, axis=0):
         Define a canvas and a pandas DataFrame with 6 rows
         >>> import pandas as pd  # doctest: +SKIP
         ... import numpy as np
+        ... import datashader as ds
         ... from datashader import Canvas
         ... import datashader.transfer_functions as tf
         ... cvs = Canvas()
@@ -214,23 +213,23 @@ def line(self, source, x, y, agg=None, axis=0):
 
         Aggregate one line across all rows, with coordinates df.A1 by df.B1
         >>> agg = cvs.line(df, x='A1', y='B1', axis=0)  # doctest: +SKIP
-        ... tf.shade(agg)
+        ... tf.spread(tf.shade(agg))
 
         Aggregate two lines across all rows. The first with coordinates
         df.A1 by df.B1 and the second with coordinates df.A2 by df.B2
         >>> agg = cvs.line(df, x=['A1', 'A2'], y=['B1', 'B2'], axis=0)  # doctest: +SKIP
-        ... tf.shade(agg)
+        ... tf.spread(tf.shade(agg))
 
         Aggregate two lines across all rows where the lines share the same
         x coordinates. The first line will have coordinates df.A1 by df.B1
         and the second will have coordinates df.A1 by df.B2
         >>> agg = cvs.line(df, x='A1', y=['B1', 'B2'], axis=0)  # doctest: +SKIP
-        ... tf.shade(agg)
+        ... tf.spread(tf.shade(agg))
 
         Aggregate 6 length-2 lines, one per row, where the ith line has
         coordinates [df.A1[i], df.A2[i]] by [df.B1[i], df.B2[i]]
         >>> agg = cvs.line(df, x=['A1', 'A2'], y=['B1', 'B2'], axis=1)  # doctest: +SKIP
-        ... tf.shade(agg)
+        ... tf.spread(tf.shade(agg))
 
         Aggregate 6 length-4 lines, one per row, where the x coordinates
         of every line are [0, 1, 2, 3] and the y coordinates of the ith line
@@ -239,10 +238,32 @@ def line(self, source, x, y, agg=None, axis=0):
         ...                x=np.arange(4),
         ...                y=['A1', 'A2', 'B1', 'B2'],
         ...                axis=1)
-        ... tf.shade(agg)
+        ... tf.spread(tf.shade(agg))
+
+        Aggregate RaggedArrays of variable length lines, one per row
+        (requires pandas >= 0.24.0)
+        >>> df_ragged = pd.DataFrame({  # doctest: +SKIP
+        ...    'A1': pd.array([
+        ...        [1, 1.5], [2, 2.5, 3], [1.5, 2, 3, 4], [3.2, 4, 5]],
+        ...        dtype='Ragged[float32]'),
+        ...    'B1': pd.array([
+        ...        [10, 12], [11, 14, 13], [10, 7, 9, 10], [7, 8, 12]],
+        ...        dtype='Ragged[float32]'),
+        ...    'group': pd.Categorical([0, 1, 2, 1])
+        ... })
+        ...
+        ... agg = cvs.line(df_ragged, x='A1', y='B1', axis=1)
+        ... tf.spread(tf.shade(agg))
+
+        Aggregate RaggedArrays of variable length lines by group column,
+        one per row (requires pandas >= 0.24.0)
+        >>> agg = cvs.line(df_ragged, x='A1', y='B1',  # doctest: +SKIP
+        ...                agg=ds.count_cat('group'), axis=1)
+        ... tf.spread(tf.shade(agg))
         """
         from .glyphs import (LineAxis0, LinesAxis1, LinesAxis1XConstant,
-                             LinesAxis1YConstant, LineAxis0Multi)
+                             LinesAxis1YConstant, LineAxis0Multi,
+                             LinesAxis1Ragged)
         from .reductions import any as any_rdn
         if agg is None:
             agg = any_rdn()
@@ -278,6 +299,9 @@ def line(self, source, x, y, agg=None, axis=0):
             elif (isinstance(x, (list, tuple)) and
                   isinstance(y, np.ndarray)):
                 glyph = LinesAxis1YConstant(tuple(x), y)
+            elif (isinstance(x, (Number, string_types)) and
+                    isinstance(y, (Number, string_types))):
+                glyph = LinesAxis1Ragged(x, y)
             else:
                 raise ValueError("""
 Invalid combination of x and y arguments to Canvas.line when axis=1.
diff --git a/datashader/glyphs.py b/datashader/glyphs.py
index 6e5a04b8b..5d5ac4eac 100644
--- a/datashader/glyphs.py
+++ b/datashader/glyphs.py
@@ -469,7 +469,7 @@ def extend(aggs, df, vt, bounds, plot_start=True):
         return extend
 
 
-class LinesRagged(_PointLike):
+class LinesAxis1Ragged(_PointLike):
     def validate(self, in_dshape):
         # TODO
         pass
@@ -478,18 +478,18 @@ def required_columns(self):
         return self.x + self.y
 
     def compute_x_bounds(self, df):
-        # return self._compute_x_bounds(df[self.x].values)
-        raise NotImplementedError()
+        bounds = self._compute_x_bounds(df[self.x].array.flat_array)
+        return self.maybe_expand_bounds(bounds)
 
     def compute_y_bounds(self, df):
-        # return self._compute_y_bounds(df[self.y].values)
-        raise NotImplementedError()
+        bounds = self._compute_y_bounds(df[self.y].array.flat_array)
+        return self.maybe_expand_bounds(bounds)
 
     @memoize
     def _build_extend(self, x_mapper, y_mapper, info, append):
         draw_line = _build_draw_line(append)
         map_onto_pixel = _build_map_onto_pixel_for_line(x_mapper, y_mapper)
-        extend_lines_ragged = _build_extend_lines_ragged(draw_line, map_onto_pixel)
+        extend_lines_ragged = _build_extend_line_axis1_ragged(draw_line, map_onto_pixel)
         x_name = self.x
         y_name = self.y
 
@@ -882,13 +882,9 @@ def extend_line(vt, bounds, xs, ys, plot_start, *aggs_and_cols):
     return extend_line
 
 
-def _build_extend_lines_ragged(draw_line, map_onto_pixel):
-    extend_line = _build_extend_line(draw_line, map_onto_pixel)
+def _build_extend_line_axis1_ragged(draw_line, map_onto_pixel):
 
-    def extend_lines_ragged(vt, bounds, xs, ys, plot_start, *aggs_and_cols):
-        """
-        here xs and ys are tuples of arrays and non-empty
-        """
+    def extend_line(vt, bounds, xs, ys, plot_start, *aggs_and_cols):
         x_start_indices = xs.start_indices
         x_flat_array = xs.flat_array
 
@@ -914,32 +910,52 @@ def perform_extend_lines_ragged(vt,
                                     plot_start,
                                     *aggs_and_cols):
 
+        nrows = len(x_start_indices)
         x_flat_len = len(x_flat_array)
         y_flat_len = len(y_flat_array)
 
-        rows = len(x_start_indices)
-        for r in range(rows):
+        i = 0
+        while i < nrows:
+            plot_start = True
+
             # Get x index range
-            x_start_index = x_start_indices[r]
-            x_stop_index = (x_start_indices[r + 1]
-                            if r < rows - 1
+            x_start_index = x_start_indices[i]
+            x_stop_index = (x_start_indices[i + 1]
+                            if i < nrows - 1
                             else x_flat_len)
 
             # Get y index range
-            y_start_index = y_start_indices[r]
-            y_stop_index = (y_start_indices[r + 1]
-                            if r < rows - 1
+            y_start_index = y_start_indices[i]
+            y_stop_index = (y_start_indices[i + 1]
+                            if i < nrows - 1
                             else y_flat_len)
 
-            # Build line slices
-            line_xs = x_flat_array[x_start_index:x_stop_index]
-            line_ys = y_flat_array[y_start_index:y_stop_index]
+            # Find line segment length as shorter of the two segments
+            segment_len = min(x_stop_index - x_start_index,
+                              y_stop_index - y_start_index)
+
+            j = 0
+            while j < segment_len - 1:
+
+                x0 = x_flat_array[x_start_index + j]
+                y0 = y_flat_array[y_start_index + j]
+                x1 = x_flat_array[x_start_index + j + 1]
+                y1 = y_flat_array[y_start_index + j + 1]
 
-            # Perform extend line
-            extend_line(
-                vt, bounds, line_xs, line_ys, plot_start, *aggs_and_cols)
+                x0, x1, y0, y1, skip, clipped, plot_start = \
+                    _skip_or_clip(x0, x1, y0, y1, bounds, plot_start)
 
-    return extend_lines_ragged
+                if not skip:
+                    x0i, y0i = map_onto_pixel(vt, bounds, x0, y0)
+                    x1i, y1i = map_onto_pixel(vt, bounds, x1, y1)
+                    draw_line(x0i, y0i, x1i, y1i, i, plot_start, clipped,
+                              *aggs_and_cols)
+                    plot_start = False
+
+                j += 1
+            i += 1
+
+    return extend_line
 
 
 def _build_draw_triangle(append):
diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py
index 4770afd54..0bb3acc50 100644
--- a/datashader/tests/test_pandas.py
+++ b/datashader/tests/test_pandas.py
@@ -642,6 +642,12 @@ def test_bug_570():
         'y0': [-4, 0, 4],
         'y1': [-4, 0, 4],
     }), ['x0', 'x1'], 'y0', 0),
+
+    # axis1 ragged arrays
+    (pd.DataFrame({
+        'x': pd.array([[4, 0], [0, -4, 0, 4]], dtype='Ragged[float32]'),
+        'y': pd.array([[0, -4], [-4, 0, 4, 0]], dtype='Ragged[float32]')
+    }), 'x', 'y', 1)
 ])
 def test_line_manual_range(df, x, y, ax):
     axis = ds.core.LinearAxis()
@@ -709,6 +715,12 @@ def test_line_manual_range(df, x, y, ax):
         'y0': [-4, 0, 4],
         'y1': [-4, 0, 4],
     }), ['x0', 'x1'], 'y0', 0),
+
+    # axis1 ragged arrays
+    (pd.DataFrame({
+        'x': pd.array([[0, -4, 0], [0,  4, 0]], dtype='Ragged[float32]'),
+        'y': pd.array([[-4, 0, 4], [-4, 0, 4]], dtype='Ragged[float32]')
+    }), 'x', 'y', 1)
 ])
 def test_line_autorange(df, x, y, ax):
     axis = ds.core.LinearAxis()
@@ -810,28 +822,33 @@ def test_line_agg_sum_axis1_none_constant():
     assert_eq(agg, out)
 
 
-def test_lines_ragged():
+def test_line_autorange_axis1_ragged():
     axis = ds.core.LinearAxis()
     lincoords = axis.compute_index(
-        axis.compute_scale_and_translate((-3., 3.), 7), 7)
+        axis.compute_scale_and_translate((-4., 4.), 9), 9)
 
     df = pd.DataFrame({
-        'x': pd.array([[4, -4], [-4, 4, -4, 4]], dtype='Ragged[float32]'),
-        'y': pd.array([[0, 0], [-4, 4, 0, 0]], dtype='Ragged[float32]')
+        'x': pd.array([[4, 0], [0, -4, 0, 4]], dtype='Ragged[float32]'),
+        'y': pd.array([[0, -4], [-4, 0, 4, 0]], dtype='Ragged[float32]')
     })
 
-    cvs = ds.Canvas(plot_width=7, plot_height=7,
-                    x_range=(-3, 3), y_range=(-3, 3))
+    cvs = ds.Canvas(plot_width=9, plot_height=9)
 
-    agg = cvs.lines(df, 'x', 'y', ds.count())
+    agg = cvs.line(df,
+                   'x',
+                   'y',
+                   ds.count(),
+                   axis=1)
 
-    sol = np.array([[0, 0, 1, 0, 1, 0, 0],
-                    [0, 1, 0, 0, 0, 1, 0],
-                    [1, 0, 0, 0, 0, 0, 1],
-                    [0, 0, 0, 0, 0, 0, 0],
-                    [1, 0, 0, 0, 0, 0, 1],
-                    [0, 1, 0, 0, 0, 1, 0],
-                    [0, 0, 1, 0, 1, 0, 0]], dtype='i4')
+    sol = np.array([[0, 0, 0, 0, 2, 0, 0, 0, 0],
+                    [0, 0, 0, 1, 0, 1, 0, 0, 0],
+                    [0, 0, 1, 0, 0, 0, 1, 0, 0],
+                    [0, 1, 0, 0, 0, 0, 0, 1, 0],
+                    [1, 0, 0, 0, 0, 0, 0, 0, 2],
+                    [0, 1, 0, 0, 0, 0, 0, 1, 0],
+                    [0, 0, 1, 0, 0, 0, 1, 0, 0],
+                    [0, 0, 0, 1, 0, 1, 0, 0, 0],
+                    [0, 0, 0, 0, 1, 0, 0, 0, 0]], dtype='i4')
 
     out = xr.DataArray(sol, coords=[lincoords, lincoords],
                        dims=['y', 'x'])
diff --git a/datashader/utils.py b/datashader/utils.py
index 4e44f293e..802be9276 100644
--- a/datashader/utils.py
+++ b/datashader/utils.py
@@ -12,6 +12,8 @@
 import dask.dataframe as dd
 import datashape
 
+from datashader.datatypes import RaggedDtype
+
 ngjit = nb.jit(nopython=True, nogil=True)
 
 
@@ -369,6 +371,8 @@ def dshape_from_pandas_helper(col):
             # Pandas stores this as a pytz.tzinfo, but DataShape wants a string
             tz = str(tz)
         return datashape.Option(datashape.DateTime(tz=tz))
+    elif isinstance(col.dtype, RaggedDtype):
+        return col.dtype
     dshape = datashape.CType.from_numpy_dtype(col.dtype)
     dshape = datashape.string if dshape == datashape.object_ else dshape
     if dshape in (datashape.string, datashape.datetime_):

From 23143110c2054ba7ca1e58b6e50d7bdb14bb0cda Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Fri, 8 Feb 2019 08:19:55 -0500
Subject: [PATCH 34/45] Dask ragged array support

---
 datashader/datatypes.py       | 14 ++++++++++++++
 datashader/glyphs.py          | 18 ++++++++++++++++++
 datashader/tests/test_dask.py | 12 ++++++++++++
 3 files changed, 44 insertions(+)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index c671891b0..6ddca8396 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -10,6 +10,12 @@
 from pandas.api.types import pandas_dtype
 from pandas.core.dtypes.common import is_extension_array_dtype
 
+try:
+    # See if we can register extension type with dask >= 1.1.0
+    from dask.dataframe.extensions import make_array_nonempty
+except ImportError:
+    make_array_nonempty = None
+
 
 def _validate_ragged_properties(start_indices, flat_array):
     """
@@ -1002,3 +1008,11 @@ def _lexograph_lt(a1, a2):
         elif e1 > e2:
             return False
     return len(a1) < len(a2)
+
+
+def ragged_array_non_empty(dtype):
+    return RaggedArray([[1], [1, 2]], dtype=dtype)
+
+
+if make_array_nonempty:
+    make_array_nonempty.register(RaggedDtype)(ragged_array_non_empty)
diff --git a/datashader/glyphs.py b/datashader/glyphs.py
index 5d5ac4eac..b41378e54 100644
--- a/datashader/glyphs.py
+++ b/datashader/glyphs.py
@@ -485,6 +485,24 @@ def compute_y_bounds(self, df):
         bounds = self._compute_y_bounds(df[self.y].array.flat_array)
         return self.maybe_expand_bounds(bounds)
 
+    @memoize
+    def compute_x_bounds_dask(self, df):
+        """Like ``PointLike._compute_x_bounds``, but memoized because
+        ``df`` is immutable/hashable (a Dask dataframe).
+        """
+        xs = df[self.x].compute().array.flat_array
+        minval, maxval = np.nanmin(xs), np.nanmax(xs)
+        return self.maybe_expand_bounds((minval, maxval))
+
+    @memoize
+    def compute_y_bounds_dask(self, df):
+        """Like ``PointLike._compute_y_bounds``, but memoized because
+        ``df`` is immutable/hashable (a Dask dataframe).
+        """
+        ys = df[self.y].compute().array.flat_array
+        minval, maxval = np.nanmin(ys), np.nanmax(ys)
+        return self.maybe_expand_bounds((minval, maxval))
+
     @memoize
     def _build_extend(self, x_mapper, y_mapper, info, append):
         draw_line = _build_draw_line(append)
diff --git a/datashader/tests/test_dask.py b/datashader/tests/test_dask.py
index 2c0780766..e45b1a6ef 100644
--- a/datashader/tests/test_dask.py
+++ b/datashader/tests/test_dask.py
@@ -324,6 +324,12 @@ def test_line():
         'y1': [0,  4,  0],
         'y2': [0,  0,  0]
     }), 'x0', ['y0', 'y1', 'y2'], 0),
+
+    # axis1 RaggedArray
+    (pd.DataFrame({
+        'x': [[4, 0, -4], [-4, 0, 4, 4, 0, -4]],
+        'y': [[0, -4, 0], [0, 4, 0, 0, 0, 0]],
+    }, dtype='Ragged[int64]'), 'x', 'y', 1),
 ])
 def test_line_manual_range(df, x, y, ax):
     axis = ds.core.LinearAxis()
@@ -391,6 +397,12 @@ def test_line_manual_range(df, x, y, ax):
         'y0': [-4, 0,  4]
     }), ['x0', 'x1', 'x2'], 'y0', 0),
 
+    # axis1 RaggedArray
+    (pd.DataFrame({
+        'x': [[0, -4, 0], [0, 0, 0], [0, 4, 0]],
+        'y': [[-4, 0, 4], [4, 0, -4], [-4, 0, 4]],
+    }, dtype='Ragged[int64]'), 'x', 'y', 1),
+
 ])
 def test_line_autorange(df, x, y, ax):
     axis = ds.core.LinearAxis()

From f4a40ebd193393dd618ced7de4f7fadd54549a8a Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Fri, 8 Feb 2019 08:23:21 -0500
Subject: [PATCH 35/45] flake8

---
 datashader/__init__.py             | 2 +-
 datashader/datatypes.py            | 1 -
 datashader/tests/test_datatypes.py | 6 +++---
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/datashader/__init__.py b/datashader/__init__.py
index 0aeb61954..da620d0a9 100644
--- a/datashader/__init__.py
+++ b/datashader/__init__.py
@@ -21,7 +21,7 @@
 # pandas >= 0.24.0 is installed
 from pandas import __version__ as pandas_version
 if LooseVersion(pandas_version) >= LooseVersion('0.24.0'):
-    from . import datatypes
+    from . import datatypes  # noqa (API import)
 
 # make pyct's example/data commands available if possible
 from functools import partial
diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 6ddca8396..709567f2e 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -525,7 +525,6 @@ def fillna(self, value=None, method=None, limit=None):
         filled : ExtensionArray with NA/NaN filled
         """
         # Override in RaggedArray to handle ndarray fill values
-        from pandas.api.types import is_array_like
         from pandas.util._validators import validate_fillna_kwargs
         from pandas.core.missing import pad_1d, backfill_1d
 
diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index 90380f841..e770046bc 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -240,8 +240,8 @@ def test_get_item_scalar():
 @pytest.mark.parametrize('index', [-1000, -6, 5, 1000])
 def test_get_item_scalar_out_of_bounds(index):
     rarray = RaggedArray([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]])
-    with pytest.raises(IndexError) as e:
-        result = rarray[index]
+    with pytest.raises(IndexError):
+        rarray[index]
 
 
 def test_get_item_slice():
@@ -511,7 +511,7 @@ def test_equality_validation(other):
 
     # invalid scalar
     with pytest.raises(ValueError, match="Cannot check equality"):
-        res = ra1 == other
+        ra1 == other
 
 
 # Pandas-provided extension array tests

From 59b0b3a5c620231f5a036e17acc3025507f1a9aa Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Fri, 8 Feb 2019 08:37:38 -0500
Subject: [PATCH 36/45] Add validation for LinesAxis1Ragged

---
 datashader/glyphs.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/datashader/glyphs.py b/datashader/glyphs.py
index b526d7140..95e56e7c6 100644
--- a/datashader/glyphs.py
+++ b/datashader/glyphs.py
@@ -472,8 +472,15 @@ def extend(aggs, df, vt, bounds, plot_start=True):
 
 class LinesAxis1Ragged(_PointLike):
     def validate(self, in_dshape):
-        # TODO
-        pass
+        try:
+            from datashader.datatypes import RaggedDtype
+        except ImportError:
+            RaggedDtype = type(None)
+
+        if not isinstance(in_dshape[str(self.x)], RaggedDtype):
+            raise ValueError('x must be a RaggedArray')
+        elif not isinstance(in_dshape[str(self.x)], RaggedDtype):
+            raise ValueError('y must be a RaggedArray')
 
     def required_columns(self):
         return self.x + self.y

From c48429e328e3a58103a1b8f26b2290dbf6ef9a1e Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Fri, 8 Feb 2019 08:38:05 -0500
Subject: [PATCH 37/45] Exception handling on import for pandas < 0.24

---
 datashader/utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/datashader/utils.py b/datashader/utils.py
index 614b14572..fb660b17b 100644
--- a/datashader/utils.py
+++ b/datashader/utils.py
@@ -12,7 +12,10 @@
 import dask.dataframe as dd
 import datashape
 
-from datashader.datatypes import RaggedDtype
+try:
+    from datashader.datatypes import RaggedDtype
+except ImportError:
+    RaggedDtype = type(None)
 
 ngjit = nb.jit(nopython=True, nogil=True)
 

From cdecd85a372a2935c5cff2db1c7cf1380660ffc2 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Fri, 8 Feb 2019 08:41:28 -0500
Subject: [PATCH 38/45] Add pandas >=0.24.1 as testing dependency so that we
 can test RaggedArray support

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index a3fa674cb..d3a7b8685 100644
--- a/setup.py
+++ b/setup.py
@@ -36,6 +36,7 @@
         'flake8',
         'nbsmoke >=0.2.6',
         'fastparquet >=0.1.6',  # optional dependency
+        'pandas >=0.24.1',  # optional ragged array support
     ],
     'examples': [],
     'examples_extra':[

From 7c8b953241813070e3809e557ef07dabe48798de Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Fri, 8 Feb 2019 09:46:32 -0500
Subject: [PATCH 39/45] absolute import

---
 datashader/datatypes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 709567f2e..48eefed19 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import
 import re
 from functools import total_ordering
 

From c846f0c2b4d898e6fce3c4a3e9878b98df55beca Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Fri, 8 Feb 2019 19:32:20 -0500
Subject: [PATCH 40/45] specify that int lists should cast to int64 numpy
 arrays

To address AppVeyor failures
---
 datashader/tests/test_datatypes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index e770046bc..cb0520638 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -182,7 +182,7 @@ def test_start_indices_dtype():
 
 
 @pytest.mark.parametrize('arg,expected', [
-    ([[1, 2]], 'int64'),
+    ([np.array([1, 2], dtype='int64')], 'int64'),
     ([[True], [False, True]], 'bool'),
     (np.array([np.array([1, 2], dtype='int8'),
                np.array([1, 2], dtype='int32')]), 'int32'),
@@ -386,7 +386,7 @@ def test_pandas_array_construction():
     arg = [[0, 1], [1, 2, 3, 4], None, [-1, -2]] * 2
     ra = pd.array(arg, dtype='ragged[int64]')
 
-    expected = RaggedArray(arg)
+    expected = RaggedArray(arg, dtype='int64')
     assert_ragged_arrays_equal(ra, expected)
 
 

From cad7d0a7fbea06b394005172b9456008df21274a Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Sat, 23 Feb 2019 19:26:19 -0500
Subject: [PATCH 41/45] Remove parameterized args from skipped tests

No reason to skip every combination, and this was causing pytest-xdist
to throw an internal error when running tests in parallel
---
 datashader/tests/test_datatypes.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py
index cb0520638..3af4b5a50 100644
--- a/datashader/tests/test_datatypes.py
+++ b/datashader/tests/test_datatypes.py
@@ -712,9 +712,9 @@ def test_array_interface(self, data):
 
 class TestRaggedMethods(eb.BaseMethodsTests):
 
-    # AttributeError: 'RaggedArray' object has no attribute 'value_counts'
+    # # AttributeError: 'RaggedArray' object has no attribute 'value_counts'
     @pytest.mark.skip(reason="value_counts not supported")
-    def test_value_counts(self, all_data, dropna):
+    def test_value_counts(self):
         pass
 
     # Add array equality
@@ -732,26 +732,26 @@ def test_unique(self, data, box, method):
     # Pandas raises
     #   ValueError: invalid fill value with a <class 'numpy.ndarray'>
     @pytest.mark.skip(reason="pandas cannot fill with ndarray")
-    def test_fillna_copy_frame(self, data_missing):
+    def test_fillna_copy_frame(self):
         pass
 
     @pytest.mark.skip(reason="pandas cannot fill with ndarray")
-    def test_fillna_copy_series(self, data_missing):
+    def test_fillna_copy_series(self):
         pass
 
     # Ragged array elements don't support binary operators
     @pytest.mark.skip(reason="ragged does not support <= on elements")
-    def test_combine_le(self, data_repeated):
+    def test_combine_le(self):
         pass
 
     @pytest.mark.skip(reason="ragged does not support + on elements")
-    def test_combine_add(self, data_repeated):
+    def test_combine_add(self):
         pass
 
     # Block manager error:
     #   ValueError: setting an array element with a sequence.
     @pytest.mark.skip(reason="combine_first not supported")
-    def test_combine_first(self, data):
+    def test_combine_first(self):
         pass
 
 
@@ -764,11 +764,11 @@ class TestRaggedMissing(eb.BaseMissingTests):
     # Errors like:
     #   ValueError: invalid fill value with a <class 'numpy.ndarray'>
     @pytest.mark.skip(reason="Can't fill with ndarray")
-    def test_fillna_series(self, data_missing):
+    def test_fillna_series(self):
         pass
 
     @pytest.mark.skip(reason="Can't fill with ndarray")
-    def test_fillna_frame(self, data_missing):
+    def test_fillna_frame(self):
         pass
 
 

From 89d1d51172ab1106edc869dbb92d79888853a87f Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Sat, 23 Feb 2019 19:27:30 -0500
Subject: [PATCH 42/45] Add Dask optimized bounds calculations for ragged list
 glyph

---
 datashader/glyphs.py | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/datashader/glyphs.py b/datashader/glyphs.py
index 03b437195..605f10c5d 100644
--- a/datashader/glyphs.py
+++ b/datashader/glyphs.py
@@ -505,22 +505,20 @@ def compute_y_bounds(self, df):
         return self.maybe_expand_bounds(bounds)
 
     @memoize
-    def compute_x_bounds_dask(self, df):
-        """Like ``PointLike._compute_x_bounds``, but memoized because
-        ``df`` is immutable/hashable (a Dask dataframe).
-        """
-        xs = df[self.x].compute().array.flat_array
-        minval, maxval = np.nanmin(xs), np.nanmax(xs)
-        return self.maybe_expand_bounds((minval, maxval))
+    def compute_bounds_dask(self, ddf):
 
-    @memoize
-    def compute_y_bounds_dask(self, df):
-        """Like ``PointLike._compute_y_bounds``, but memoized because
-        ``df`` is immutable/hashable (a Dask dataframe).
-        """
-        ys = df[self.y].compute().array.flat_array
-        minval, maxval = np.nanmin(ys), np.nanmax(ys)
-        return self.maybe_expand_bounds((minval, maxval))
+        r = ddf.map_partitions(lambda df: np.array([[
+            np.nanmin(df[self.x].array.flat_array),
+            np.nanmax(df[self.x].array.flat_array),
+            np.nanmin(df[self.y].array.flat_array),
+            np.nanmax(df[self.y].array.flat_array)]]
+        )).compute()
+
+        x_extents = np.nanmin(r[:, 0]), np.nanmax(r[:, 1])
+        y_extents = np.nanmin(r[:, 2]), np.nanmax(r[:, 3])
+
+        return (self.maybe_expand_bounds(x_extents),
+                self.maybe_expand_bounds(y_extents))
 
     @memoize
     def _build_extend(self, x_mapper, y_mapper, info, append):

From 92eaab2123f61fb76f628148d9f43008ba89fba3 Mon Sep 17 00:00:00 2001
From: "James A. Bednar" <jbednar@users.noreply.github.com>
Date: Thu, 28 Feb 2019 05:51:14 -0500
Subject: [PATCH 43/45] Apply suggestions from code review

Co-Authored-By: jonmmease <jon.mease@gmail.com>
---
 datashader/datatypes.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 48eefed19..9d8e3c356 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -20,7 +20,7 @@
 
 def _validate_ragged_properties(start_indices, flat_array):
     """
-    Validate that start_indices are flat_array arrays may be used to
+    Validate that start_indices are flat_array arrays that may be used to
     represent a valid RaggedArray.
 
     Parameters
@@ -456,14 +456,14 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
     @classmethod
     def _from_factorized(cls, values, original):
         """
-        Reconstruct an ExtensionArray after factorization.
+        Reconstruct a RaggedArray after factorization.
 
         Parameters
         ----------
         values : ndarray
             An integer ndarray with the factorized values.
         original : RaggedArray
-            The original ExtensionArray that factorize was called on.
+            The original RaggedArray that factorize was called on.
 
         See Also
         --------
@@ -921,7 +921,7 @@ def _eq_ragged_ndarray1d(start_indices, flat_array, a):
 
     Notes
     -----
-    This function is not numba accelerated because it, but design, inputs
+    This function is not numba accelerated because it, by design, inputs
     a numpy object array
     """
 
@@ -962,7 +962,7 @@ def _eq_ragged_ndarray2d(start_indices, flat_array, a):
     -------
     mask: ndarray
         1D bool array of same length as input RaggedArray with elements True
-        when corresponding elements of ra equals corresponding row of a
+        when corresponding elements of ra equal corresponding row of `a`
     """
     n = len(start_indices)
     m = len(flat_array)

From 1538909bd15721f7184fc0228a59733cf2a2bbf4 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Thu, 28 Feb 2019 06:09:56 -0500
Subject: [PATCH 44/45] Refer to parent docstrings rather than duplicate

---
 datashader/datatypes.py | 236 ++++++----------------------------------
 1 file changed, 35 insertions(+), 201 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 9d8e3c356..7e1d348dc 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -123,6 +123,9 @@ class RaggedDtype(ExtensionDtype):
 
     @property
     def name(self):
+        """
+        See docstring for ExtensionDtype.name
+        """
         return 'Ragged[{subtype}]'.format(subtype=self.subtype)
 
     def __repr__(self):
@@ -130,10 +133,16 @@ def __repr__(self):
 
     @classmethod
     def construct_array_type(cls):
+        """
+        See docstring for ExtensionDtype.construct_array_type
+        """
         return RaggedArray
 
     @classmethod
     def construct_from_string(cls, string):
+        """
+        See docstring for ExtensionDtype.construct_from_string
+        """
         # lowercase string
         string = string.lower()
 
@@ -372,25 +381,13 @@ def start_indices(self):
 
     def __len__(self):
         """
-        Length of this array
-
-        Returns
-        -------
-        length : int
+        See docstring for ExtensionArray.__len__
         """
         return len(self._start_indices)
 
     def __getitem__(self, item):
         """
-        Parameters
-        ----------
-        item : int, slice, or ndarray
-            * int: The position in 'self' to get.
-
-            * slice: A slice object, where 'start', 'stop', and 'step' are
-              integers or None
-
-            * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
+        See docstring for ExtensionArray.__getitem__
         """
         if isinstance(item, Integral):
             if item < -len(self) or item >= len(self):
@@ -434,41 +431,14 @@ def __getitem__(self, item):
     @classmethod
     def _from_sequence(cls, scalars, dtype=None, copy=False):
         """
-        Construct a new RaggedArray from a sequence of scalars.
-
-        Parameters
-        ----------
-        scalars : Sequence
-            Each element will be an instance of the scalar type for this
-            array, ``cls.dtype.type``.
-        dtype : dtype, optional
-            Construct for this particular dtype. This should be a Dtype
-            compatible with the ExtensionArray.
-        copy : boolean, default False
-            If True, copy the underlying data.
-
-        Returns
-        -------
-        RaggedArray
+        See docstring for ExtensionArray._from_sequence
         """
         return RaggedArray(scalars, dtype=dtype)
 
     @classmethod
     def _from_factorized(cls, values, original):
         """
-        Reconstruct a RaggedArray after factorization.
-
-        Parameters
-        ----------
-        values : ndarray
-            An integer ndarray with the factorized values.
-        original : RaggedArray
-            The original RaggedArray that factorize was called on.
-
-        See Also
-        --------
-        pandas.factorize
-        ExtensionArray.factorize
+        See docstring for ExtensionArray._from_factorized
         """
         return RaggedArray(
             [_RaggedElement.array_or_nan(v) for v in values],
@@ -479,18 +449,20 @@ def _as_ragged_element_array(self):
                          for i in range(len(self))])
 
     def _values_for_factorize(self):
+        """
+        See docstring for ExtensionArray._values_for_factorize
+        """
         return self._as_ragged_element_array(), np.nan
 
     def _values_for_argsort(self):
+        """
+        See docstring for ExtensionArray._values_for_argsort
+        """
         return self._as_ragged_element_array()
 
     def unique(self):
         """
-        Compute the ExtensionArray of unique values.
-
-        Returns
-        -------
-        uniques : ExtensionArray
+        See docstring for ExtensionArray.unique
         """
         from pandas import unique
 
@@ -501,29 +473,7 @@ def unique(self):
 
     def fillna(self, value=None, method=None, limit=None):
         """
-        Fill NA/NaN values using the specified method.
-
-        Parameters
-        ----------
-        value : scalar, array-like
-            If a scalar value is passed it is used to fill all missing values.
-            Alternatively, an array-like 'value' can be given. It's expected
-            that the array-like have the same length as 'self'.
-        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
-            Method to use for filling holes in reindexed Series
-            pad / ffill: propagate last valid observation forward to next valid
-            backfill / bfill: use NEXT valid observation to fill gap
-        limit : int, default None
-            If method is specified, this is the maximum number of consecutive
-            NaN values to forward/backward fill. In other words, if there is
-            a gap with more than this number of consecutive NaNs, it will only
-            be partially filled. If method is not specified, this is the
-            maximum number of entries along the entire axis where NaNs will be
-            filled.
-
-        Returns
-        -------
-        filled : ExtensionArray with NA/NaN filled
+        See docstring for ExtensionArray.fillna
         """
         # Override in RaggedArray to handle ndarray fill values
         from pandas.util._validators import validate_fillna_kwargs
@@ -560,37 +510,7 @@ def fillna(self, value=None, method=None, limit=None):
     def shift(self, periods=1, fill_value=None):
         # type: (int, object) -> ExtensionArray
         """
-        Shift values by desired number.
-
-        Newly introduced missing values are filled with
-        ``self.dtype.na_value``.
-
-        .. versionadded:: 0.24.0
-
-        Parameters
-        ----------
-        periods : int, default 1
-            The number of periods to shift. Negative values are allowed
-            for shifting backwards.
-
-        fill_value : object, optional
-            The scalar value to use for newly introduced missing values.
-            The default is ``self.dtype.na_value``
-
-            .. versionadded:: 0.24.0
-
-        Returns
-        -------
-        shifted : ExtensionArray
-
-        Notes
-        -----
-        If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is
-        returned.
-
-        If ``periods > len(self)``, then an array of size
-        len(self) is returned, with all values filled with
-        ``self.dtype.na_value``.
+        See docstring for ExtensionArray.shift
         """
         # Override in RaggedArray to handle ndarray fill values
 
@@ -616,49 +536,8 @@ def shift(self, periods=1, fill_value=None):
 
     def searchsorted(self, value, side="left", sorter=None):
         """
-        Find indices where elements should be inserted to maintain order.
-
-        .. versionadded:: 0.24.0
-
-        Find the indices into a sorted array `self` (a) such that, if the
-        corresponding elements in `v` were inserted before the indices, the
-        order of `self` would be preserved.
-
-        Assuming that `a` is sorted:
-
-        ======  ============================
-        `side`  returned index `i` satisfies
-        ======  ============================
-        left    ``self[i-1] < v <= self[i]``
-        right   ``self[i-1] <= v < self[i]``
-        ======  ============================
-
-        Parameters
-        ----------
-        value : array_like
-            Values to insert into `self`.
-        side : {'left', 'right'}, optional
-            If 'left', the index of the first suitable location found is given.
-            If 'right', return the last such index.  If there is no suitable
-            index, return either 0 or N (where N is the length of `self`).
-        sorter : 1-D array_like, optional
-            Optional array of integer indices that sort array a into ascending
-            order. They are typically the result of argsort.
-
-        Returns
-        -------
-        indices : array of ints
-            Array of insertion points with the same shape as `value`.
-
-        See Also
-        --------
-        numpy.searchsorted : Similar method from NumPy.
-        """
-        # Note: the base tests provided by pandas only test the basics.
-        # We do not test
-        # 1. Values outside the range of the `data_for_sorting` fixture
-        # 2. Values between the values in the `data_for_sorting` fixture
-        # 3. Missing values.
+        See docstring for ExtensionArray.searchsorted
+        """
         arr = self._as_ragged_element_array()
         if isinstance(value, RaggedArray):
             search_value = value._as_ragged_element_array()
@@ -668,13 +547,7 @@ def searchsorted(self, value, side="left", sorter=None):
 
     def isna(self):
         """
-        A 1-D array indicating if each value is missing.
-
-        Returns
-        -------
-        na_values : np.ndarray
-            boolean ndarray the same length as the ragged array where values
-            of True represent missing/NA values.
+        See docstring for ExtensionArray.isna
         """
         stop_indices = np.hstack([self.start_indices[1:],
                                   [len(self.flat_array)]])
@@ -684,34 +557,7 @@ def isna(self):
 
     def take(self, indices, allow_fill=False, fill_value=None):
         """
-        Take elements from an array.
-
-        Parameters
-        ----------
-        indices : sequence of integers
-            Indices to be taken.
-        allow_fill : bool, default False
-            How to handle negative values in `indices`.
-
-            * False: negative values in `indices` indicate positional indices
-              from the right (the default). This is similar to
-              :func:`numpy.take`.
-
-            * True: negative values in `indices` indicate
-              missing values. These values are set to `fill_value`. Any other
-              other negative values raise a ``ValueError``.
-
-        fill_value : any, default None
-            Fill value to use for NA-indices when `allow_fill` is True.
-
-        Returns
-        -------
-        RaggedArray
-
-        Raises
-        ------
-        IndexError
-            When the indices are out of bounds for the array.
+        See docstring for ExtensionArray.take
         """
         if allow_fill:
             invalid_inds = [i for i in indices if i < -1]
@@ -731,16 +577,7 @@ def take(self, indices, allow_fill=False, fill_value=None):
 
     def copy(self, deep=False):
         """
-        Return a copy of the array.
-
-        Parameters
-        ----------
-        deep : bool, default False
-            Also copy the underlying data backing this array.
-
-        Returns
-        -------
-        RaggedArray
+        See docstring for ExtensionArray.copy
         """
         data = dict(
             flat_array=self.flat_array,
@@ -751,15 +588,7 @@ def copy(self, deep=False):
     @classmethod
     def _concat_same_type(cls, to_concat):
         """
-        Concatenate multiple RaggedArray instances
-
-        Parameters
-        ----------
-        to_concat : list of RaggedArray
-
-        Returns
-        -------
-        RaggedArray
+        See docstring for ExtensionArray._concat_same_type
         """
         # concat flat_arrays
         flat_array = np.hstack(ra.flat_array for ra in to_concat)
@@ -778,18 +607,23 @@ def _concat_same_type(cls, to_concat):
 
     @property
     def dtype(self):
+        """
+        See docstring for ExtensionArray.dtype
+        """
         return self._dtype
 
     @property
     def nbytes(self):
         """
-        The number of bytes needed to store this object in memory.
+        See docstring for ExtensionArray.nbytes
         """
         return (self._flat_array.nbytes +
                 self._start_indices.nbytes)
 
     def astype(self, dtype, copy=True):
-
+        """
+        See docstring for ExtensionArray.astype
+        """
         dtype = pandas_dtype(dtype)
         if isinstance(dtype, RaggedDtype):
             if copy:

From c42f0df68dcfbf8715358194bba11a394f788c86 Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Fri, 1 Mar 2019 07:27:12 -0500
Subject: [PATCH 45/45] Remove docstring references

---
 datashader/datatypes.py | 73 +++++++----------------------------------
 1 file changed, 12 insertions(+), 61 deletions(-)

diff --git a/datashader/datatypes.py b/datashader/datatypes.py
index 7e1d348dc..c552860bd 100644
--- a/datashader/datatypes.py
+++ b/datashader/datatypes.py
@@ -116,6 +116,12 @@ def __repr__(self):
 
 @register_extension_dtype
 class RaggedDtype(ExtensionDtype):
+    """
+    Pandas ExtensionDtype to represent a ragged array datatype
+
+    Methods not otherwise documented here are inherited from ExtensionDtype;
+    please see the corresponding method on that class for the docstring
+    """
     type = np.ndarray
     base = np.dtype('O')
     _subtype_re = re.compile(r"^ragged\[(?P<subtype>\w+)\]$")
@@ -123,9 +129,6 @@ class RaggedDtype(ExtensionDtype):
 
     @property
     def name(self):
-        """
-        See docstring for ExtensionDtype.name
-        """
         return 'Ragged[{subtype}]'.format(subtype=self.subtype)
 
     def __repr__(self):
@@ -133,16 +136,10 @@ def __repr__(self):
 
     @classmethod
     def construct_array_type(cls):
-        """
-        See docstring for ExtensionDtype.construct_array_type
-        """
         return RaggedArray
 
     @classmethod
     def construct_from_string(cls, string):
-        """
-        See docstring for ExtensionDtype.construct_from_string
-        """
         # lowercase string
         string = string.lower()
 
@@ -205,6 +202,12 @@ def missing(v):
 
 
 class RaggedArray(ExtensionArray):
+    """
+    Pandas ExtensionArray to represent ragged arrays
+
+    Methods not otherwise documented here are inherited from ExtensionArray;
+    please see the corresponding method on that class for the docstring
+    """
     def __init__(self, data, dtype=None, copy=False):
         """
         Construct a RaggedArray
@@ -380,15 +383,9 @@ def start_indices(self):
         return self._start_indices
 
     def __len__(self):
-        """
-        See docstring for ExtensionArray.__len__
-        """
         return len(self._start_indices)
 
     def __getitem__(self, item):
-        """
-        See docstring for ExtensionArray.__getitem__
-        """
         if isinstance(item, Integral):
             if item < -len(self) or item >= len(self):
                 raise IndexError("{item} is out of bounds".format(item=item))
@@ -430,16 +427,10 @@ def __getitem__(self, item):
 
     @classmethod
     def _from_sequence(cls, scalars, dtype=None, copy=False):
-        """
-        See docstring for ExtensionArray._from_sequence
-        """
         return RaggedArray(scalars, dtype=dtype)
 
     @classmethod
     def _from_factorized(cls, values, original):
-        """
-        See docstring for ExtensionArray._from_factorized
-        """
         return RaggedArray(
             [_RaggedElement.array_or_nan(v) for v in values],
             dtype=original.flat_array.dtype)
@@ -449,21 +440,12 @@ def _as_ragged_element_array(self):
                          for i in range(len(self))])
 
     def _values_for_factorize(self):
-        """
-        See docstring for ExtensionArray._values_for_factorize
-        """
         return self._as_ragged_element_array(), np.nan
 
     def _values_for_argsort(self):
-        """
-        See docstring for ExtensionArray._values_for_argsort
-        """
         return self._as_ragged_element_array()
 
     def unique(self):
-        """
-        See docstring for ExtensionArray.unique
-        """
         from pandas import unique
 
         uniques = unique(self._as_ragged_element_array())
@@ -472,9 +454,6 @@ def unique(self):
             dtype=self.dtype)
 
     def fillna(self, value=None, method=None, limit=None):
-        """
-        See docstring for ExtensionArray.fillna
-        """
         # Override in RaggedArray to handle ndarray fill values
         from pandas.util._validators import validate_fillna_kwargs
         from pandas.core.missing import pad_1d, backfill_1d
@@ -508,10 +487,6 @@ def fillna(self, value=None, method=None, limit=None):
         return new_values
 
     def shift(self, periods=1, fill_value=None):
-        # type: (int, object) -> ExtensionArray
-        """
-        See docstring for ExtensionArray.shift
-        """
         # Override in RaggedArray to handle ndarray fill values
 
         # Note: this implementation assumes that `self.dtype.na_value` can be
@@ -535,9 +510,6 @@ def shift(self, periods=1, fill_value=None):
         return self._concat_same_type([a, b])
 
     def searchsorted(self, value, side="left", sorter=None):
-        """
-        See docstring for ExtensionArray.searchsorted
-        """
         arr = self._as_ragged_element_array()
         if isinstance(value, RaggedArray):
             search_value = value._as_ragged_element_array()
@@ -546,9 +518,6 @@ def searchsorted(self, value, side="left", sorter=None):
         return arr.searchsorted(search_value, side=side, sorter=sorter)
 
     def isna(self):
-        """
-        See docstring for ExtensionArray.isna
-        """
         stop_indices = np.hstack([self.start_indices[1:],
                                   [len(self.flat_array)]])
 
@@ -556,9 +525,6 @@ def isna(self):
         return element_lengths == 0
 
     def take(self, indices, allow_fill=False, fill_value=None):
-        """
-        See docstring for ExtensionArray.take
-        """
         if allow_fill:
             invalid_inds = [i for i in indices if i < -1]
             if invalid_inds:
@@ -576,9 +542,6 @@ def take(self, indices, allow_fill=False, fill_value=None):
         return RaggedArray(sequence, dtype=self.flat_array.dtype)
 
     def copy(self, deep=False):
-        """
-        See docstring for ExtensionArray.copy
-        """
         data = dict(
             flat_array=self.flat_array,
             start_indices=self.start_indices)
@@ -587,9 +550,6 @@ def copy(self, deep=False):
 
     @classmethod
     def _concat_same_type(cls, to_concat):
-        """
-        See docstring for ExtensionArray._concat_same_type
-        """
         # concat flat_arrays
         flat_array = np.hstack(ra.flat_array for ra in to_concat)
 
@@ -607,23 +567,14 @@ def _concat_same_type(cls, to_concat):
 
     @property
     def dtype(self):
-        """
-        See docstring for ExtensionArray.dtype
-        """
         return self._dtype
 
     @property
     def nbytes(self):
-        """
-        See docstring for ExtensionArray.nbytes
-        """
         return (self._flat_array.nbytes +
                 self._start_indices.nbytes)
 
     def astype(self, dtype, copy=True):
-        """
-        See docstring for ExtensionArray.astype
-        """
         dtype = pandas_dtype(dtype)
         if isinstance(dtype, RaggedDtype):
             if copy: