diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 00000000000..30c1e18f33c
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1,2 @@
+github: numfocus
+custom: http://numfocus.org/donate-to-xarray
diff --git a/doc/computation.rst b/doc/computation.rst
index 663c546be20..240a1e5704b 100644
--- a/doc/computation.rst
+++ b/doc/computation.rst
@@ -95,6 +95,9 @@ for filling missing values via 1D interpolation.
 Note that xarray slightly diverges from the pandas ``interpolate`` syntax by
 providing the ``use_coordinate`` keyword which facilitates a clear specification
 of which values to use as the index in the interpolation.
+xarray also provides the ``max_gap`` keyword argument to limit the interpolation to
+data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na`
+for more.
 
 Aggregation
 ===========
diff --git a/doc/conf.py b/doc/conf.py
index 7c1557a1e66..0e04f8ccde8 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -340,9 +340,10 @@
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3/", None),
-    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
-    "iris": ("http://scitools.org.uk/iris/docs/latest/", None),
-    "numpy": ("https://docs.scipy.org/doc/numpy/", None),
-    "numba": ("https://numba.pydata.org/numba-doc/latest/", None),
-    "matplotlib": ("https://matplotlib.org/", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None),
+    "iris": ("https://scitools.org.uk/iris/docs/latest", None),
+    "numpy": ("https://docs.scipy.org/doc/numpy", None),
+    "scipy": ("https://docs.scipy.org/doc/scipy/reference", None),
+    "numba": ("https://numba.pydata.org/numba-doc/latest", None),
+    "matplotlib": ("https://matplotlib.org", None),
 }
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 96f0ba9a4a6..6bf495713fe 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -38,6 +38,13 @@ Breaking changes
 
 New Features
 ~~~~~~~~~~~~
+
+- Added the ``fill_value`` option to :py:meth:`~xarray.DataArray.unstack` and
+  :py:meth:`~xarray.Dataset.unstack` (:issue:`3518`).
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
+- Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and
+  :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data
+  gap that will be filled by interpolation. By `Deepak Cherian <https://github.com/dcherian>`_.
 - :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` have been added for dropping labels.
   :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for 
   dropping variables (including coordinates). The existing ``drop`` methods remain as a backward compatible 
@@ -73,12 +80,22 @@ New Features
   for xarray objects. Note that xarray objects with a dask.array backend already used
   deterministic hashing in previous releases; this change implements it when whole
   xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is
-  invoked. (:issue:`3378`, :pull:`3446`)
+  invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`)
   By `Deepak Cherian <https://github.com/dcherian>`_ and
   `Guido Imperiale <https://github.com/crusaderky>`_.
+- Add the documented-but-missing :py:meth:`xarray.core.groupby.DatasetGroupBy.quantile`.
+  (:issue:`3525`, :pull:`3527`). By `Justus Magin <https://github.com/keewis>`_.
 
 Bug fixes
 ~~~~~~~~~
+- Ensure an index of type ``CFTimeIndex`` is not converted to a ``DatetimeIndex`` when 
+  calling :py:meth:`Dataset.rename` (also :py:meth:`Dataset.rename_dims`
+  and :py:meth:`xr.Dataset.rename_vars`). By `Mathias Hauser <https://github.com/mathause>`_
+  (:issue:`3522`).
+- Fix a bug in `set_index` in case that an existing dimension becomes a level variable of MultiIndex. (:pull:`3520`)
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
+- Harmonize `_FillValue`, `missing_value` during encoding and decoding steps. (:pull:`3502`)
+  By `Anderson Banihirwe <https://github.com/andersy005>`_. 
 - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed
   but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle <https://github.com/rdoyle45>`_
 - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`).
@@ -88,9 +105,14 @@ Bug fixes
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4.
   By `Anderson Banihirwe <https://github.com/andersy005>`_.
+- Rolling reduction operations no longer compute dask arrays by default. (:issue:`3161`).
+  In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated.
+  By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and
   :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions.
   (:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
+- Allow appending datetime and bool data variables to zarr stores.
+  (:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.
 
 Documentation
 ~~~~~~~~~~~~~
@@ -111,7 +133,8 @@ Internal Changes
 ~~~~~~~~~~~~~~~~
 
 - Added integration tests against `pint <https://pint.readthedocs.io/>`_.
-  (:pull:`3238`, :pull:`3447`, :pull:`3508`) by `Justus Magin <https://github.com/keewis>`_.
+  (:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`)
+  by `Justus Magin <https://github.com/keewis>`_.
 
   .. note::
 
@@ -130,6 +153,9 @@ Internal Changes
 - Enable type checking on default sentinel values (:pull:`3472`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
 
+- Add :py:meth:`Variable._replace` for simpler replacing of a subset of attributes (:pull:`3472`)
+  By `Maximilian Roos <https://github.com/max-sixty>`_
+
 .. _whats-new.0.14.0:
 
 v0.14.0 (14 Oct 2019)
@@ -217,6 +243,9 @@ Bug fixes
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix error in concatenating unlabeled dimensions (:pull:`3362`).
   By `Deepak Cherian <https://github.com/dcherian/>`_.
+- Warn if the ``dim`` kwarg is passed to rolling operations. This is redundant since a dimension is
+  specified when the :py:class:`DatasetRolling` or :py:class:`DataArrayRolling` object is created.
+  (:pull:`3362`). By `Deepak Cherian <https://github.com/dcherian/>`_.
 
 Documentation
 ~~~~~~~~~~~~~
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index d23594fc675..945b3937c43 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -1234,6 +1234,8 @@ def _validate_datatypes_for_zarr_append(dataset):
     def check_dtype(var):
         if (
             not np.issubdtype(var.dtype, np.number)
+            and not np.issubdtype(var.dtype, np.datetime64)
+            and not np.issubdtype(var.dtype, np.bool)
             and not coding.strings.is_unicode_dtype(var.dtype)
             and not var.dtype == object
         ):
@@ -1241,8 +1243,9 @@ def check_dtype(var):
             raise ValueError(
                 "Invalid dtype for data variable: {} "
                 "dtype must be a subtype of number, "
-                "a fixed sized string, a fixed size "
-                "unicode string or an object".format(var)
+                "datetime, bool, a fixed sized string, "
+                "a fixed size unicode string or an "
+                "object".format(var)
             )
 
     for k in dataset.data_vars.values():
diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index 5f9c8932b6b..2b5f87ab0cd 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -8,7 +8,6 @@
 
 from ..core import dtypes, duck_array_ops, indexing
 from ..core.pycompat import dask_array_type
-from ..core.utils import equivalent
 from ..core.variable import Variable
 
 
@@ -152,18 +151,25 @@ def encode(self, variable, name=None):
         fv = encoding.get("_FillValue")
         mv = encoding.get("missing_value")
 
-        if fv is not None and mv is not None and not equivalent(fv, mv):
+        if (
+            fv is not None
+            and mv is not None
+            and not duck_array_ops.allclose_or_equiv(fv, mv)
+        ):
             raise ValueError(
-                "Variable {!r} has multiple fill values {}. "
-                "Cannot encode data. ".format(name, [fv, mv])
+                f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data."
             )
 
         if fv is not None:
+            # Ensure _FillValue is cast to same dtype as data's
+            encoding["_FillValue"] = data.dtype.type(fv)
             fill_value = pop_to(encoding, attrs, "_FillValue", name=name)
             if not pd.isnull(fill_value):
                 data = duck_array_ops.fillna(data, fill_value)
 
         if mv is not None:
+            # Ensure missing_value is cast to same dtype as data's
+            encoding["missing_value"] = data.dtype.type(mv)
             fill_value = pop_to(encoding, attrs, "missing_value", name=name)
             if not pd.isnull(fill_value) and fv is None:
                 data = duck_array_ops.fillna(data, fill_value)
diff --git a/xarray/core/common.py b/xarray/core/common.py
index d372115ea57..2afe4b4c3a7 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -43,14 +43,12 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
         if include_skipna:
 
             def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs):
-                return self.reduce(
-                    func, dim, axis, skipna=skipna, allow_lazy=True, **kwargs
-                )
+                return self.reduce(func, dim, axis, skipna=skipna, **kwargs)
 
         else:
 
             def wrapped_func(self, dim=None, axis=None, **kwargs):  # type: ignore
-                return self.reduce(func, dim, axis, allow_lazy=True, **kwargs)
+                return self.reduce(func, dim, axis, **kwargs)
 
         return wrapped_func
 
@@ -83,20 +81,13 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
 
             def wrapped_func(self, dim=None, skipna=None, **kwargs):
                 return self.reduce(
-                    func,
-                    dim,
-                    skipna=skipna,
-                    numeric_only=numeric_only,
-                    allow_lazy=True,
-                    **kwargs,
+                    func, dim, skipna=skipna, numeric_only=numeric_only, **kwargs
                 )
 
         else:
 
             def wrapped_func(self, dim=None, **kwargs):  # type: ignore
-                return self.reduce(
-                    func, dim, numeric_only=numeric_only, allow_lazy=True, **kwargs
-                )
+                return self.reduce(func, dim, numeric_only=numeric_only, **kwargs)
 
         return wrapped_func
 
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index f0b5afdf4d5..700ef2a8d16 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -48,7 +48,7 @@
     assert_coordinate_consistent,
     remap_label_indexers,
 )
-from .dataset import Dataset, merge_indexes, split_indexes
+from .dataset import Dataset, split_indexes
 from .formatting import format_item
 from .indexes import Indexes, copy_indexes, default_indexes
 from .merge import PANDAS_TYPES, _extract_indexes_from_coords
@@ -249,14 +249,14 @@ class DataArray(AbstractArray, DataWithCoords):
         Dictionary for holding arbitrary metadata.
     """
 
-    _accessors: Optional[Dict[str, Any]]  # noqa
+    _cache: Dict[str, Any]
     _coords: Dict[Any, Variable]
     _indexes: Optional[Dict[Hashable, pd.Index]]
     _name: Optional[Hashable]
     _variable: Variable
 
     __slots__ = (
-        "_accessors",
+        "_cache",
         "_coords",
         "_file_obj",
         "_indexes",
@@ -376,7 +376,6 @@ def __init__(
         assert isinstance(coords, dict)
         self._coords = coords
         self._name = name
-        self._accessors = None
 
         # TODO(shoyer): document this argument, once it becomes part of the
         # public interface.
@@ -772,7 +771,9 @@ def reset_coords(
             return dataset
 
     def __dask_tokenize__(self):
-        return (type(self), self._variable, self._coords, self._name)
+        from dask.base import normalize_token
+
+        return normalize_token((type(self), self._variable, self._coords, self._name))
 
     def __dask_graph__(self):
         return self._to_temp_dataset().__dask_graph__()
@@ -1617,10 +1618,10 @@ def set_index(
         --------
         DataArray.reset_index
         """
-        _check_inplace(inplace)
-        indexes = either_dict_or_kwargs(indexes, indexes_kwargs, "set_index")
-        coords, _ = merge_indexes(indexes, self._coords, set(), append=append)
-        return self._replace(coords=coords)
+        ds = self._to_temp_dataset().set_index(
+            indexes, append=append, inplace=inplace, **indexes_kwargs
+        )
+        return self._from_temp_dataset(ds)
 
     def reset_index(
         self,
@@ -1743,7 +1744,9 @@ def stack(
         return self._from_temp_dataset(ds)
 
     def unstack(
-        self, dim: Union[Hashable, Sequence[Hashable], None] = None
+        self,
+        dim: Union[Hashable, Sequence[Hashable], None] = None,
+        fill_value: Any = dtypes.NA,
     ) -> "DataArray":
         """
         Unstack existing dimensions corresponding to MultiIndexes into
@@ -1756,6 +1759,7 @@ def unstack(
         dim : hashable or sequence of hashable, optional
             Dimension(s) over which to unstack. By default unstacks all
             MultiIndexes.
+        fill_value: value to be filled. By default, np.nan
 
         Returns
         -------
@@ -1787,7 +1791,7 @@ def unstack(
         --------
         DataArray.stack
         """
-        ds = self._to_temp_dataset().unstack(dim)
+        ds = self._to_temp_dataset().unstack(dim, fill_value)
         return self._from_temp_dataset(ds)
 
     def to_unstacked_dataset(self, dim, level=0):
@@ -2034,44 +2038,69 @@ def fillna(self, value: Any) -> "DataArray":
 
     def interpolate_na(
         self,
-        dim=None,
+        dim: Hashable = None,
         method: str = "linear",
         limit: int = None,
         use_coordinate: Union[bool, str] = True,
+        max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None,
         **kwargs: Any,
     ) -> "DataArray":
-        """Interpolate values according to different methods.
+        """Fill in NaNs by interpolating according to different methods.
 
         Parameters
         ----------
         dim : str
             Specifies the dimension along which to interpolate.
-        method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
-                  'polynomial', 'barycentric', 'krog', 'pchip',
-                  'spline', 'akima'}, optional
+        method : str, optional
             String indicating which method to use for interpolation:
 
             - 'linear': linear interpolation (Default). Additional keyword
-              arguments are passed to ``numpy.interp``
-            - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
-              'polynomial': are passed to ``scipy.interpolate.interp1d``. If
-              method=='polynomial', the ``order`` keyword argument must also be
+              arguments are passed to :py:func:`numpy.interp`
+            - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial':
+              are passed to :py:func:`scipy.interpolate.interp1d`. If
+              ``method='polynomial'``, the ``order`` keyword argument must also be
               provided.
-            - 'barycentric', 'krog', 'pchip', 'spline', and `akima`: use their
-              respective``scipy.interpolate`` classes.
-        use_coordinate : boolean or str, default True
+            - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their
+              respective :py:class:`scipy.interpolate` classes.
+        use_coordinate : bool, str, default True
             Specifies which index to use as the x values in the interpolation
             formulated as `y = f(x)`. If False, values are treated as if
-            eqaully-spaced along `dim`. If True, the IndexVariable `dim` is
-            used. If use_coordinate is a string, it specifies the name of a
+            eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is
+            used. If ``use_coordinate`` is a string, it specifies the name of a
             coordinate variariable to use as the index.
         limit : int, default None
             Maximum number of consecutive NaNs to fill. Must be greater than 0
-            or None for no limit.
+            or None for no limit. This filling is done regardless of the size of
+            the gap in the data. To only interpolate over gaps less than a given length,
+            see ``max_gap``.
+        max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None.
+            Maximum size of gap, a continuous sequence of NaNs, that will be filled.
+            Use None for no limit. When interpolating along a datetime64 dimension
+            and ``use_coordinate=True``, ``max_gap`` can be one of the following:
+
+            - a string that is valid input for pandas.to_timedelta
+            - a :py:class:`numpy.timedelta64` object
+            - a :py:class:`pandas.Timedelta` object
+            Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled
+            dimensions has not been implemented yet. Gap length is defined as the difference
+            between coordinate values at the first data point after a gap and the last value
+            before a gap. For gaps at the beginning (end), gap length is defined as the difference
+            between coordinate values at the first (last) valid data point and the first (last) NaN.
+            For example, consider::
+
+                <xarray.DataArray (x: 9)>
+                array([nan, nan, nan,  1., nan, nan,  4., nan, nan])
+                Coordinates:
+                  * x        (x) int64 0 1 2 3 4 5 6 7 8
+
+            The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively
+        kwargs : dict, optional
+            parameters passed verbatim to the underlying interpolation function
 
         Returns
         -------
-        DataArray
+        interpolated: DataArray
+            Filled in DataArray.
 
         See also
         --------
@@ -2086,6 +2115,7 @@ def interpolate_na(
             method=method,
             limit=limit,
             use_coordinate=use_coordinate,
+            max_gap=max_gap,
             **kwargs,
         )
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index f46fe9604ff..e5f9fac9036 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -210,6 +210,7 @@ def merge_indexes(
     """
     vars_to_replace: Dict[Hashable, Variable] = {}
     vars_to_remove: List[Hashable] = []
+    dims_to_replace: Dict[Hashable, Hashable] = {}
     error_msg = "{} is not the name of an existing variable."
 
     for dim, var_names in indexes.items():
@@ -250,7 +251,7 @@ def merge_indexes(
         if not len(names) and len(var_names) == 1:
             idx = pd.Index(variables[var_names[0]].values)
 
-        else:
+        else:  # MultiIndex
             for n in var_names:
                 try:
                     var = variables[n]
@@ -262,15 +263,22 @@ def merge_indexes(
                 levels.append(cat.categories)
 
             idx = pd.MultiIndex(levels, codes, names=names)
+            for n in names:
+                dims_to_replace[n] = dim
 
         vars_to_replace[dim] = IndexVariable(dim, idx)
         vars_to_remove.extend(var_names)
 
     new_variables = {k: v for k, v in variables.items() if k not in vars_to_remove}
     new_variables.update(vars_to_replace)
+
+    # update dimensions if necessary  GH: 3512
+    for k, v in new_variables.items():
+        if any(d in dims_to_replace for d in v.dims):
+            new_dims = [dims_to_replace.get(d, d) for d in v.dims]
+            new_variables[k] = v._replace(dims=new_dims)
     new_coord_names = coord_names | set(vars_to_replace)
     new_coord_names -= set(vars_to_remove)
-
     return new_variables, new_coord_names
 
 
@@ -417,8 +425,8 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords):
     coordinates used for label based indexing.
     """
 
-    _accessors: Optional[Dict[str, Any]]
     _attrs: Optional[Dict[Hashable, Any]]
+    _cache: Dict[str, Any]
     _coord_names: Set[Hashable]
     _dims: Dict[Hashable, int]
     _encoding: Optional[Dict[Hashable, Any]]
@@ -426,8 +434,8 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords):
     _variables: Dict[Hashable, Variable]
 
     __slots__ = (
-        "_accessors",
         "_attrs",
+        "_cache",
         "_coord_names",
         "_dims",
         "_encoding",
@@ -533,7 +541,6 @@ def __init__(
             data_vars, coords, compat=compat
         )
 
-        self._accessors = None
         self._attrs = dict(attrs) if attrs is not None else None
         self._file_obj = None
         self._encoding = None
@@ -658,7 +665,11 @@ def load(self, **kwargs) -> "Dataset":
         return self
 
     def __dask_tokenize__(self):
-        return (type(self), self._variables, self._coord_names, self._attrs)
+        from dask.base import normalize_token
+
+        return normalize_token(
+            (type(self), self._variables, self._coord_names, self._attrs)
+        )
 
     def __dask_graph__(self):
         graphs = {k: v.__dask_graph__() for k, v in self.variables.items()}
@@ -864,7 +875,6 @@ def _construct_direct(
         obj._attrs = attrs
         obj._file_obj = file_obj
         obj._encoding = encoding
-        obj._accessors = None
         return obj
 
     @classmethod
@@ -2665,7 +2675,7 @@ def _rename_indexes(self, name_dict, dims_set):
                     verify_integrity=False,
                 )
             else:
-                index = pd.Index(v, name=new_name)
+                index = v.rename(new_name)
             indexes[new_name] = index
         return indexes
 
@@ -3333,7 +3343,7 @@ def ensure_stackable(val):
 
         return data_array
 
-    def _unstack_once(self, dim: Hashable) -> "Dataset":
+    def _unstack_once(self, dim: Hashable, fill_value) -> "Dataset":
         index = self.get_index(dim)
         index = index.remove_unused_levels()
         full_idx = pd.MultiIndex.from_product(index.levels, names=index.names)
@@ -3342,7 +3352,7 @@ def _unstack_once(self, dim: Hashable) -> "Dataset":
         if index.equals(full_idx):
             obj = self
         else:
-            obj = self.reindex({dim: full_idx}, copy=False)
+            obj = self.reindex({dim: full_idx}, copy=False, fill_value=fill_value)
 
         new_dim_names = index.names
         new_dim_sizes = [lev.size for lev in index.levels]
@@ -3368,7 +3378,11 @@ def _unstack_once(self, dim: Hashable) -> "Dataset":
             variables, coord_names=coord_names, indexes=indexes
         )
 
-    def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset":
+    def unstack(
+        self,
+        dim: Union[Hashable, Iterable[Hashable]] = None,
+        fill_value: Any = dtypes.NA,
+    ) -> "Dataset":
         """
         Unstack existing dimensions corresponding to MultiIndexes into
         multiple new dimensions.
@@ -3380,6 +3394,7 @@ def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset":
         dim : Hashable or iterable of Hashable, optional
             Dimension(s) over which to unstack. By default unstacks all
             MultiIndexes.
+        fill_value: value to be filled. By default, np.nan
 
         Returns
         -------
@@ -3417,7 +3432,7 @@ def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset":
 
         result = self.copy(deep=False)
         for dim in dims:
-            result = result._unstack_once(dim)
+            result = result._unstack_once(dim, fill_value)
         return result
 
     def update(self, other: "CoercibleMapping", inplace: bool = None) -> "Dataset":
@@ -3906,42 +3921,65 @@ def interpolate_na(
         method: str = "linear",
         limit: int = None,
         use_coordinate: Union[bool, Hashable] = True,
+        max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None,
         **kwargs: Any,
     ) -> "Dataset":
-        """Interpolate values according to different methods.
+        """Fill in NaNs by interpolating according to different methods.
 
         Parameters
         ----------
-        dim : Hashable
+        dim : str
             Specifies the dimension along which to interpolate.
-        method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
-                  'polynomial', 'barycentric', 'krog', 'pchip',
-                  'spline'}, optional
+        method : str, optional
             String indicating which method to use for interpolation:
 
             - 'linear': linear interpolation (Default). Additional keyword
-              arguments are passed to ``numpy.interp``
-            - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
-              'polynomial': are passed to ``scipy.interpolate.interp1d``. If
-              method=='polynomial', the ``order`` keyword argument must also be
+              arguments are passed to :py:func:`numpy.interp`
+            - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial':
+              are passed to :py:func:`scipy.interpolate.interp1d`. If
+              ``method='polynomial'``, the ``order`` keyword argument must also be
               provided.
-            - 'barycentric', 'krog', 'pchip', 'spline': use their respective
-              ``scipy.interpolate`` classes.
-        use_coordinate : boolean or str, default True
+            - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their
+              respective :py:class:`scipy.interpolate` classes.
+        use_coordinate : bool, str, default True
             Specifies which index to use as the x values in the interpolation
             formulated as `y = f(x)`. If False, values are treated as if
-            eqaully-spaced along `dim`. If True, the IndexVariable `dim` is
-            used. If use_coordinate is a string, it specifies the name of a
+            eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is
+            used. If ``use_coordinate`` is a string, it specifies the name of a
             coordinate variariable to use as the index.
         limit : int, default None
             Maximum number of consecutive NaNs to fill. Must be greater than 0
-            or None for no limit.
-        kwargs : any
-            parameters passed verbatim to the underlying interplation function
+            or None for no limit. This filling is done regardless of the size of
+            the gap in the data. To only interpolate over gaps less than a given length,
+            see ``max_gap``.
+        max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None.
+            Maximum size of gap, a continuous sequence of NaNs, that will be filled.
+            Use None for no limit. When interpolating along a datetime64 dimension
+            and ``use_coordinate=True``, ``max_gap`` can be one of the following:
+
+            - a string that is valid input for pandas.to_timedelta
+            - a :py:class:`numpy.timedelta64` object
+            - a :py:class:`pandas.Timedelta` object
+            Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled
+            dimensions has not been implemented yet. Gap length is defined as the difference
+            between coordinate values at the first data point after a gap and the last value
+            before a gap. For gaps at the beginning (end), gap length is defined as the difference
+            between coordinate values at the first (last) valid data point and the first (last) NaN.
+            For example, consider::
+
+                <xarray.DataArray (x: 9)>
+                array([nan, nan, nan,  1., nan, nan,  4., nan, nan])
+                Coordinates:
+                  * x        (x) int64 0 1 2 3 4 5 6 7 8
+
+            The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively
+        kwargs : dict, optional
+            parameters passed verbatim to the underlying interpolation function
 
         Returns
         -------
-        Dataset
+        interpolated: Dataset
+            Filled in Dataset.
 
         See also
         --------
@@ -3957,6 +3995,7 @@ def interpolate_na(
             method=method,
             limit=limit,
             use_coordinate=use_coordinate,
+            max_gap=max_gap,
             **kwargs,
         )
         return new
@@ -4037,7 +4076,7 @@ def reduce(
         keep_attrs: bool = None,
         keepdims: bool = False,
         numeric_only: bool = False,
-        allow_lazy: bool = False,
+        allow_lazy: bool = None,
         **kwargs: Any,
     ) -> "Dataset":
         """Reduce this dataset by applying `func` along some dimension(s).
diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py
index f473eaa497d..79abbccea39 100644
--- a/xarray/core/extensions.py
+++ b/xarray/core/extensions.py
@@ -20,10 +20,15 @@ def __get__(self, obj, cls):
             # we're accessing the attribute of the class, i.e., Dataset.geo
             return self._accessor
 
+        # Use the same dict as @pandas.util.cache_readonly.
+        # It must be explicitly declared in obj.__slots__.
         try:
-            return obj._accessors[self._name]
-        except TypeError:
-            obj._accessors = {}
+            cache = obj._cache
+        except AttributeError:
+            cache = obj._cache = {}
+
+        try:
+            return cache[self._name]
         except KeyError:
             pass
 
@@ -35,7 +40,7 @@ def __get__(self, obj, cls):
             # something else (GH933):
             raise RuntimeError("error initializing %r accessor." % self._name)
 
-        obj._accessors[self._name] = accessor_obj
+        cache[self._name] = accessor_obj
         return accessor_obj
 
 
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 37ab29dabd2..5606b5d754d 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -558,6 +558,59 @@ def fillna(self, value):
         out = ops.fillna(self, value)
         return out
 
+    def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
+        """Compute the qth quantile over each array in the groups and
+        concatenate them together into a new array.
+
+        Parameters
+        ----------
+        q : float in range of [0,1] (or sequence of floats)
+            Quantile to compute, which must be between 0 and 1
+            inclusive.
+        dim : `...`, str or sequence of str, optional
+            Dimension(s) over which to apply quantile.
+            Defaults to the grouped dimension.
+        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+            This optional parameter specifies the interpolation method to
+            use when the desired quantile lies between two data points
+            ``i < j``:
+                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
+                  the fractional part of the index surrounded by ``i`` and
+                  ``j``.
+                * lower: ``i``.
+                * higher: ``j``.
+                * nearest: ``i`` or ``j``, whichever is nearest.
+                * midpoint: ``(i + j) / 2``.
+
+        Returns
+        -------
+        quantiles : Variable
+            If `q` is a single quantile, then the result is a
+            scalar. If multiple percentiles are given, first axis of
+            the result corresponds to the quantile. In either case a
+            quantile dimension is added to the return array. The other
+            dimensions are the dimensions that remain after the
+            reduction of the array.
+
+        See Also
+        --------
+        numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
+        DataArray.quantile
+        """
+        if dim is None:
+            dim = self._group_dim
+
+        out = self.map(
+            self._obj.__class__.quantile,
+            shortcut=False,
+            q=q,
+            dim=dim,
+            interpolation=interpolation,
+            keep_attrs=keep_attrs,
+        )
+
+        return out
+
     def where(self, cond, other=dtypes.NA):
         """Return elements from `self` or `other` depending on `cond`.
 
@@ -586,9 +639,7 @@ def _first_or_last(self, op, skipna, keep_attrs):
             return self._obj
         if keep_attrs is None:
             keep_attrs = _get_keep_attrs(default=True)
-        return self.reduce(
-            op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs, allow_lazy=True
-        )
+        return self.reduce(op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs)
 
     def first(self, skipna=None, keep_attrs=None):
         """Return the first element of each group along the group dimension
@@ -741,60 +792,6 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False):
         combined = self._maybe_unstack(combined)
         return combined
 
-    def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
-        """Compute the qth quantile over each array in the groups and
-        concatenate them together into a new array.
-
-        Parameters
-        ----------
-        q : float in range of [0,1] (or sequence of floats)
-            Quantile to compute, which must be between 0 and 1
-            inclusive.
-        dim : `...`, str or sequence of str, optional
-            Dimension(s) over which to apply quantile.
-            Defaults to the grouped dimension.
-        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
-            This optional parameter specifies the interpolation method to
-            use when the desired quantile lies between two data points
-            ``i < j``:
-                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
-                  the fractional part of the index surrounded by ``i`` and
-                  ``j``.
-                * lower: ``i``.
-                * higher: ``j``.
-                * nearest: ``i`` or ``j``, whichever is nearest.
-                * midpoint: ``(i + j) / 2``.
-
-        Returns
-        -------
-        quantiles : Variable
-            If `q` is a single quantile, then the result
-            is a scalar. If multiple percentiles are given, first axis of
-            the result corresponds to the quantile and a quantile dimension
-            is added to the return array. The other dimensions are the
-            dimensions that remain after the reduction of the array.
-
-        See Also
-        --------
-        numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
-        DataArray.quantile
-        """
-        if dim is None:
-            dim = self._group_dim
-
-        out = self.map(
-            self._obj.__class__.quantile,
-            shortcut=False,
-            q=q,
-            dim=dim,
-            interpolation=interpolation,
-            keep_attrs=keep_attrs,
-        )
-
-        if np.asarray(q, dtype=np.float64).ndim == 0:
-            out = out.drop_vars("quantile")
-        return out
-
     def reduce(
         self, func, dim=None, axis=None, keep_attrs=None, shortcut=True, **kwargs
     ):
diff --git a/xarray/core/missing.py b/xarray/core/missing.py
index 77dde66484e..117fcaf8f81 100644
--- a/xarray/core/missing.py
+++ b/xarray/core/missing.py
@@ -1,18 +1,46 @@
 import warnings
 from functools import partial
-from typing import Any, Callable, Dict, Sequence
+from numbers import Number
+from typing import Any, Callable, Dict, Hashable, Sequence, Union
 
 import numpy as np
 import pandas as pd
 
 from . import utils
-from .common import _contains_datetime_like_objects
+from .common import _contains_datetime_like_objects, ones_like
 from .computation import apply_ufunc
 from .duck_array_ops import dask_array_type
 from .utils import OrderedSet, is_scalar
 from .variable import Variable, broadcast_variables
 
 
+def _get_nan_block_lengths(obj, dim: Hashable, index: Variable):
+    """
+    Return an object where each NaN element in 'obj' is replaced by the
+    length of the gap the element is in.
+    """
+
+    # make variable so that we get broadcasting for free
+    index = Variable([dim], index)
+
+    # algorithm from https://github.com/pydata/xarray/pull/3302#discussion_r324707072
+    arange = ones_like(obj) * index
+    valid = obj.notnull()
+    valid_arange = arange.where(valid)
+    cumulative_nans = valid_arange.ffill(dim=dim).fillna(index[0])
+
+    nan_block_lengths = (
+        cumulative_nans.diff(dim=dim, label="upper")
+        .reindex({dim: obj[dim]})
+        .where(valid)
+        .bfill(dim=dim)
+        .where(~valid, 0)
+        .fillna(index[-1] - valid_arange.max())
+    )
+
+    return nan_block_lengths
+
+
 class BaseInterpolator:
     """Generic interpolator class for normalizing interpolation methods
     """
@@ -178,7 +206,7 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs):
     return ds
 
 
-def get_clean_interp_index(arr, dim, use_coordinate=True):
+def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] = True):
     """get index to use for x values in interpolation.
 
     If use_coordinate is True, the coordinate that shares the name of the
@@ -195,23 +223,33 @@ def get_clean_interp_index(arr, dim, use_coordinate=True):
             index = arr.coords[use_coordinate]
             if index.ndim != 1:
                 raise ValueError(
-                    "Coordinates used for interpolation must be 1D, "
-                    "%s is %dD." % (use_coordinate, index.ndim)
+                    f"Coordinates used for interpolation must be 1D, "
+                    f"{use_coordinate} is {index.ndim}D."
                 )
+            index = index.to_index()
+
+        # TODO: index.name is None for multiindexes
+        # set name for nice error messages below
+        if isinstance(index, pd.MultiIndex):
+            index.name = dim
+
+        if not index.is_monotonic:
+            raise ValueError(f"Index {index.name!r} must be monotonically increasing")
+
+        if not index.is_unique:
+            raise ValueError(f"Index {index.name!r} has duplicate values")
 
         # raise if index cannot be cast to a float (e.g. MultiIndex)
         try:
             index = index.values.astype(np.float64)
         except (TypeError, ValueError):
             # pandas raises a TypeError
-            # xarray/nuppy raise a ValueError
+            # xarray/numpy raise a ValueError
             raise TypeError(
-                "Index must be castable to float64 to support"
-                "interpolation, got: %s" % type(index)
+                f"Index {index.name!r} must be castable to float64 to support "
+                f"interpolation, got {type(index).__name__}."
             )
-        # check index sorting now so we can skip it later
-        if not (np.diff(index) > 0).all():
-            raise ValueError("Index must be monotonicly increasing")
+
     else:
         axis = arr.get_axis_num(dim)
         index = np.arange(arr.shape[axis], dtype=np.float64)
@@ -220,7 +258,13 @@ def get_clean_interp_index(arr, dim, use_coordinate=True):
 
 
 def interp_na(
-    self, dim=None, use_coordinate=True, method="linear", limit=None, **kwargs
+    self,
+    dim: Hashable = None,
+    use_coordinate: Union[bool, str] = True,
+    method: str = "linear",
+    limit: int = None,
+    max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None,
+    **kwargs,
 ):
     """Interpolate values according to different methods.
     """
@@ -230,6 +274,40 @@ def interp_na(
     if limit is not None:
         valids = _get_valid_fill_mask(self, dim, limit)
 
+    if max_gap is not None:
+        max_type = type(max_gap).__name__
+        if not is_scalar(max_gap):
+            raise ValueError("max_gap must be a scalar.")
+
+        if (
+            dim in self.indexes
+            and isinstance(self.indexes[dim], pd.DatetimeIndex)
+            and use_coordinate
+        ):
+            if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)):
+                raise TypeError(
+                    f"Underlying index is DatetimeIndex. Expected max_gap of type str, pandas.Timedelta or numpy.timedelta64 but received {max_type}"
+                )
+
+            if isinstance(max_gap, str):
+                try:
+                    max_gap = pd.to_timedelta(max_gap)
+                except ValueError:
+                    raise ValueError(
+                        f"Could not convert {max_gap!r} to timedelta64 using pandas.to_timedelta"
+                    )
+
+            if isinstance(max_gap, pd.Timedelta):
+                max_gap = np.timedelta64(max_gap.value, "ns")
+
+            max_gap = np.timedelta64(max_gap, "ns").astype(np.float64)
+
+        if not use_coordinate:
+            if not isinstance(max_gap, (Number, np.number)):
+                raise TypeError(
+                    f"Expected integer or floating point max_gap since use_coordinate=False. Received {max_type}."
+                )
+
     # method
     index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate)
     interp_class, kwargs = _get_interpolator(method, **kwargs)
@@ -253,6 +331,14 @@ def interp_na(
     if limit is not None:
         arr = arr.where(valids)
 
+    if max_gap is not None:
+        if dim not in self.coords:
+            raise NotImplementedError(
+                "max_gap not implemented for unlabeled coordinates yet."
+            )
+        nan_block_lengths = _get_nan_block_lengths(self, dim, index)
+        arr = arr.where(nan_block_lengths <= max_gap)
+
     return arr
 
 
diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
index f4e571a8efe..a1864332f4d 100644
--- a/xarray/core/rolling.py
+++ b/xarray/core/rolling.py
@@ -1,4 +1,5 @@
 import functools
+import warnings
 from typing import Callable
 
 import numpy as np
@@ -351,6 +352,14 @@ def _bottleneck_reduce(self, func, **kwargs):
     def _numpy_or_bottleneck_reduce(
         self, array_agg_func, bottleneck_move_func, **kwargs
     ):
+        if "dim" in kwargs:
+            warnings.warn(
+                f"Reductions will be applied along the rolling dimension '{self.dim}'. Passing the 'dim' kwarg to reduction operations has no effect and will raise an error in xarray 0.16.0.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+            del kwargs["dim"]
+
         if bottleneck_move_func is not None and not isinstance(
             self.obj.data, dask_array_type
         ):
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 916df75b3e0..e630dc4b457 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -1,5 +1,7 @@
+import copy
 import functools
 import itertools
+import warnings
 from collections import defaultdict
 from datetime import timedelta
 from distutils.version import LooseVersion
@@ -23,10 +25,11 @@
 from .pycompat import dask_array_type, integer_types
 from .utils import (
     OrderedSet,
+    _default,
     decode_numpy_dict_values,
     either_dict_or_kwargs,
-    infix_dims,
     ensure_us_time_resolution,
+    infix_dims,
 )
 
 try:
@@ -393,7 +396,9 @@ def compute(self, **kwargs):
     def __dask_tokenize__(self):
         # Use v.data, instead of v._data, in order to cope with the wrappers
         # around NetCDF and the like
-        return type(self), self._dims, self.data, self._attrs
+        from dask.base import normalize_token
+
+        return normalize_token((type(self), self._dims, self.data, self._attrs))
 
     def __dask_graph__(self):
         if isinstance(self._data, dask_array_type):
@@ -884,7 +889,20 @@ def copy(self, deep=True, data=None):
         # note:
         # dims is already an immutable tuple
         # attributes and encoding will be copied when the new Array is created
-        return type(self)(self.dims, data, self._attrs, self._encoding, fastpath=True)
+        return self._replace(data=data)
+
+    def _replace(
+        self, dims=_default, data=_default, attrs=_default, encoding=_default
+    ) -> "Variable":
+        if dims is _default:
+            dims = copy.copy(self._dims)
+        if data is _default:
+            data = copy.copy(self.data)
+        if attrs is _default:
+            attrs = copy.copy(self._attrs)
+        if encoding is _default:
+            encoding = copy.copy(self._encoding)
+        return type(self)(dims, data, attrs, encoding, fastpath=True)
 
     def __copy__(self):
         return self.copy(deep=False)
@@ -1425,7 +1443,7 @@ def reduce(
         axis=None,
         keep_attrs=None,
         keepdims=False,
-        allow_lazy=False,
+        allow_lazy=None,
         **kwargs,
     ):
         """Reduce this array by applying `func` along some dimension(s).
@@ -1466,7 +1484,17 @@ def reduce(
 
         if dim is not None:
             axis = self.get_axis_num(dim)
+
+        if allow_lazy is not None:
+            warnings.warn(
+                "allow_lazy is deprecated and will be removed in version 0.16.0. It is now True by default.",
+                DeprecationWarning,
+            )
+        else:
+            allow_lazy = True
+
         input_data = self.data if allow_lazy else self.values
+
         if axis is not None:
             data = func(input_data, axis=axis, **kwargs)
         else:
@@ -1973,8 +2001,10 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False):
             self._data = PandasIndexAdapter(self._data)
 
     def __dask_tokenize__(self):
+        from dask.base import normalize_token
+
         # Don't waste time converting pd.Index to np.ndarray
-        return (type(self), self._dims, self._data.array, self._attrs)
+        return normalize_token((type(self), self._dims, self._data.array, self._attrs))
 
     def load(self):
         # data is already loaded into memory for IndexVariable
diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py
index 6cd584daa96..3e0474e7b60 100644
--- a/xarray/tests/test_coding.py
+++ b/xarray/tests/test_coding.py
@@ -20,6 +20,23 @@ def test_CFMaskCoder_decode():
     assert_identical(expected, encoded)
 
 
+def test_CFMaskCoder_encode_missing_fill_values_conflict():
+    original = xr.Variable(
+        ("x",),
+        [0.0, -1.0, 1.0],
+        encoding={"_FillValue": np.float32(1e20), "missing_value": np.float64(1e20)},
+    )
+    coder = variables.CFMaskCoder()
+    encoded = coder.encode(original)
+
+    assert encoded.dtype == encoded.attrs["missing_value"].dtype
+    assert encoded.dtype == encoded.attrs["_FillValue"].dtype
+
+    with pytest.warns(variables.SerializationWarning):
+        roundtripped = coder.decode(coder.encode(original))
+        assert_identical(roundtripped, original)
+
+
 def test_CFMaskCoder_missing_value():
     expected = xr.DataArray(
         np.array([[26915, 27755, -9999, 27705], [25595, -9999, 28315, -9999]]),
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index fa8ae9991d7..4c1f317342f 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -12,6 +12,7 @@
 import xarray as xr
 import xarray.ufuncs as xu
 from xarray import DataArray, Dataset, Variable
+from xarray.core import duck_array_ops
 from xarray.testing import assert_chunks_equal
 from xarray.tests import mock
 
@@ -217,6 +218,8 @@ def test_reduce(self):
         self.assertLazyAndAllClose((u < 1).all("x"), (v < 1).all("x"))
         with raises_regex(NotImplementedError, "dask"):
             v.median()
+        with raise_if_dask_computes():
+            v.reduce(duck_array_ops.mean)
 
     def test_missing_values(self):
         values = np.array([0, 1, np.nan, 3])
@@ -488,7 +491,17 @@ def test_groupby(self):
         v = self.lazy_array
 
         expected = u.groupby("x").mean(...)
-        actual = v.groupby("x").mean(...)
+        with raise_if_dask_computes():
+            actual = v.groupby("x").mean(...)
+        self.assertLazyAndAllClose(expected, actual)
+
+    def test_rolling(self):
+        u = self.eager_array
+        v = self.lazy_array
+
+        expected = u.rolling(x=2).mean()
+        with raise_if_dask_computes():
+            actual = v.rolling(x=2).mean()
         self.assertLazyAndAllClose(expected, actual)
 
     def test_groupby_first(self):
@@ -500,7 +513,8 @@ def test_groupby_first(self):
         with raises_regex(NotImplementedError, "dask"):
             v.groupby("ab").first()
         expected = u.groupby("ab").first()
-        actual = v.groupby("ab").first(skipna=False)
+        with raise_if_dask_computes():
+            actual = v.groupby("ab").first(skipna=False)
         self.assertLazyAndAllClose(expected, actual)
 
     def test_reindex(self):
@@ -1283,6 +1297,32 @@ def test_token_identical(obj, transform):
     )
 
 
+def test_recursive_token():
+    """Test that tokenization is invoked recursively, and doesn't just rely on the
+    output of str()
+    """
+    a = np.ones(10000)
+    b = np.ones(10000)
+    b[5000] = 2
+    assert str(a) == str(b)
+    assert dask.base.tokenize(a) != dask.base.tokenize(b)
+
+    # Test DataArray and Variable
+    da_a = DataArray(a)
+    da_b = DataArray(b)
+    assert dask.base.tokenize(da_a) != dask.base.tokenize(da_b)
+
+    # Test Dataset
+    ds_a = da_a.to_dataset(name="x")
+    ds_b = da_b.to_dataset(name="x")
+    assert dask.base.tokenize(ds_a) != dask.base.tokenize(ds_b)
+
+    # Test IndexVariable
+    da_a = DataArray(a, dims=["x"], coords={"x": a})
+    da_b = DataArray(a, dims=["x"], coords={"x": b})
+    assert dask.base.tokenize(da_a) != dask.base.tokenize(da_b)
+
+
 @requires_scipy_or_netCDF4
 def test_normalize_token_with_backend(map_ds):
     with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as tmp_file:
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index b0cb9d672d8..35d556c9ef4 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -1183,6 +1183,16 @@ def test_selection_multiindex_remove_unused(self):
         expected = expected.set_index(xy=["x", "y"]).unstack()
         assert_identical(expected, actual)
 
+    def test_selection_multiindex_from_level(self):
+        # GH: 3512
+        da = DataArray([0, 1], dims=["x"], coords={"x": [0, 1], "y": "a"})
+        db = DataArray([2, 3], dims=["x"], coords={"x": [0, 1], "y": "b"})
+        data = xr.concat([da, db], dim="x").set_index(xy=["x", "y"])
+        assert data.dims == ("xy",)
+        actual = data.sel(y="a")
+        expected = data.isel(xy=[0, 1]).unstack("xy").squeeze("y").drop("y")
+        assert_equal(actual, expected)
+
     def test_virtual_default_coords(self):
         array = DataArray(np.zeros((5,)), dims="x")
         expected = DataArray(range(5), dims="x", name="x")
@@ -4190,6 +4200,9 @@ def test_rolling_wrapped_bottleneck(da, name, center, min_periods):
     )
     assert_array_equal(actual.values, expected)
 
+    with pytest.warns(DeprecationWarning, match="Reductions will be applied"):
+        getattr(rolling_obj, name)(dim="time")
+
     # Test center
     rolling_obj = da.rolling(time=7, center=center)
     actual = getattr(rolling_obj, name)()["time"]
@@ -4205,6 +4218,9 @@ def test_rolling_wrapped_dask(da_dask, name, center, min_periods, window):
     # dask version
     rolling_obj = da_dask.rolling(time=window, min_periods=min_periods, center=center)
     actual = getattr(rolling_obj, name)().load()
+    if name != "count":
+        with pytest.warns(DeprecationWarning, match="Reductions will be applied"):
+            getattr(rolling_obj, name)(dim="time")
     # numpy version
     rolling_obj = da_dask.load().rolling(
         time=window, min_periods=min_periods, center=center
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index d001c43da94..be40ce7c6e8 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from pandas.core.indexes.datetimes import DatetimeIndex
 
 import xarray as xr
 from xarray import (
@@ -22,6 +23,7 @@
     open_dataset,
     set_options,
 )
+from xarray.coding.cftimeindex import CFTimeIndex
 from xarray.core import dtypes, indexing, utils
 from xarray.core.common import duck_array_ops, full_like
 from xarray.core.npcompat import IS_NEP18_ACTIVE
@@ -90,6 +92,14 @@ def create_append_test_data(seed=None):
     string_var = np.array(["ae", "bc", "df"], dtype=object)
     string_var_to_append = np.array(["asdf", "asdfg"], dtype=object)
     unicode_var = ["áó", "áó", "áó"]
+    datetime_var = np.array(
+        ["2019-01-01", "2019-01-02", "2019-01-03"], dtype="datetime64[s]"
+    )
+    datetime_var_to_append = np.array(
+        ["2019-01-04", "2019-01-05"], dtype="datetime64[s]"
+    )
+    bool_var = np.array([True, False, True], dtype=np.bool)
+    bool_var_to_append = np.array([False, True], dtype=np.bool)
 
     ds = xr.Dataset(
         data_vars={
@@ -102,6 +112,8 @@ def create_append_test_data(seed=None):
             "unicode_var": xr.DataArray(
                 unicode_var, coords=[time1], dims=["time"]
             ).astype(np.unicode_),
+            "datetime_var": xr.DataArray(datetime_var, coords=[time1], dims=["time"]),
+            "bool_var": xr.DataArray(bool_var, coords=[time1], dims=["time"]),
         }
     )
 
@@ -118,6 +130,10 @@ def create_append_test_data(seed=None):
             "unicode_var": xr.DataArray(
                 unicode_var[:nt2], coords=[time2], dims=["time"]
             ).astype(np.unicode_),
+            "datetime_var": xr.DataArray(
+                datetime_var_to_append, coords=[time2], dims=["time"]
+            ),
+            "bool_var": xr.DataArray(bool_var_to_append, coords=[time2], dims=["time"]),
         }
     )
 
@@ -2444,6 +2460,53 @@ def test_rename_vars(self):
         with pytest.raises(ValueError):
             original.rename_vars(names_dict_bad)
 
+    @requires_cftime
+    def test_rename_does_not_change_CFTimeIndex_type(self):
+        # make sure CFTimeIndex is not converted to DatetimeIndex #3522
+
+        time = xr.cftime_range(start="2000", periods=6, freq="2MS", calendar="noleap")
+        orig = Dataset(coords={"time": time})
+
+        renamed = orig.rename(time="time_new")
+        assert "time_new" in renamed.indexes
+        assert isinstance(renamed.indexes["time_new"], CFTimeIndex)
+        assert renamed.indexes["time_new"].name == "time_new"
+
+        # check original has not changed
+        assert "time" in orig.indexes
+        assert isinstance(orig.indexes["time"], CFTimeIndex)
+        assert orig.indexes["time"].name == "time"
+
+        # note: rename_dims(time="time_new") drops "ds.indexes"
+        renamed = orig.rename_dims()
+        assert isinstance(renamed.indexes["time"], CFTimeIndex)
+
+        renamed = orig.rename_vars()
+        assert isinstance(renamed.indexes["time"], CFTimeIndex)
+
+    def test_rename_does_not_change_DatetimeIndex_type(self):
+        # make sure DatetimeIndex is conderved on rename
+
+        time = pd.date_range(start="2000", periods=6, freq="2MS")
+        orig = Dataset(coords={"time": time})
+
+        renamed = orig.rename(time="time_new")
+        assert "time_new" in renamed.indexes
+        assert isinstance(renamed.indexes["time_new"], DatetimeIndex)
+        assert renamed.indexes["time_new"].name == "time_new"
+
+        # check original has not changed
+        assert "time" in orig.indexes
+        assert isinstance(orig.indexes["time"], DatetimeIndex)
+        assert orig.indexes["time"].name == "time"
+
+        # note: rename_dims(time="time_new") drops "ds.indexes"
+        renamed = orig.rename_dims()
+        assert isinstance(renamed.indexes["time"], DatetimeIndex)
+
+        renamed = orig.rename_vars()
+        assert isinstance(renamed.indexes["time"], DatetimeIndex)
+
     def test_swap_dims(self):
         original = Dataset({"x": [1, 2, 3], "y": ("x", list("abc")), "z": 42})
         expected = Dataset({"z": 42}, {"x": ("y", [1, 2, 3]), "y": list("abc")})
@@ -2731,6 +2794,23 @@ def test_unstack_errors(self):
         with raises_regex(ValueError, "do not have a MultiIndex"):
             ds.unstack("x")
 
+    def test_unstack_fill_value(self):
+        ds = xr.Dataset(
+            {"var": (("x",), np.arange(6))},
+            coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)},
+        )
+        # make ds incomplete
+        ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"])
+        # test fill_value
+        actual = ds.unstack("index", fill_value=-1)
+        expected = ds.unstack("index").fillna(-1).astype(np.int)
+        assert actual["var"].dtype == np.int
+        assert_equal(actual, expected)
+
+        actual = ds["var"].unstack("index", fill_value=-1)
+        expected = ds["var"].unstack("index").fillna(-1).astype(np.int)
+        assert actual.equals(expected)
+
     def test_stack_unstack_fast(self):
         ds = Dataset(
             {
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index 581affa3471..97bd31ae050 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -137,42 +137,58 @@ def test_da_groupby_empty():
 
 def test_da_groupby_quantile():
 
-    array = xr.DataArray([1, 2, 3, 4, 5, 6], [("x", [1, 1, 1, 2, 2, 2])])
+    array = xr.DataArray(
+        data=[1, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x"
+    )
 
     # Scalar quantile
-    expected = xr.DataArray([2, 5], [("x", [1, 2])])
+    expected = xr.DataArray(
+        data=[2, 5], coords={"x": [1, 2], "quantile": 0.5}, dims="x"
+    )
     actual = array.groupby("x").quantile(0.5)
     assert_identical(expected, actual)
 
     # Vector quantile
-    expected = xr.DataArray([[1, 3], [4, 6]], [("x", [1, 2]), ("quantile", [0, 1])])
+    expected = xr.DataArray(
+        data=[[1, 3], [4, 6]],
+        coords={"x": [1, 2], "quantile": [0, 1]},
+        dims=("x", "quantile"),
+    )
     actual = array.groupby("x").quantile([0, 1])
     assert_identical(expected, actual)
 
     # Multiple dimensions
     array = xr.DataArray(
-        [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]],
-        [("x", [1, 1, 1, 2, 2]), ("y", [0, 0, 1])],
+        data=[[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]],
+        coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]},
+        dims=("x", "y"),
     )
 
     actual_x = array.groupby("x").quantile(0, dim=...)
-    expected_x = xr.DataArray([1, 4], [("x", [1, 2])])
+    expected_x = xr.DataArray(
+        data=[1, 4], coords={"x": [1, 2], "quantile": 0}, dims="x"
+    )
     assert_identical(expected_x, actual_x)
 
     actual_y = array.groupby("y").quantile(0, dim=...)
-    expected_y = xr.DataArray([1, 22], [("y", [0, 1])])
+    expected_y = xr.DataArray(
+        data=[1, 22], coords={"y": [0, 1], "quantile": 0}, dims="y"
+    )
     assert_identical(expected_y, actual_y)
 
     actual_xx = array.groupby("x").quantile(0)
     expected_xx = xr.DataArray(
-        [[1, 11, 22], [4, 15, 24]], [("x", [1, 2]), ("y", [0, 0, 1])]
+        data=[[1, 11, 22], [4, 15, 24]],
+        coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0},
+        dims=("x", "y"),
     )
     assert_identical(expected_xx, actual_xx)
 
     actual_yy = array.groupby("y").quantile(0)
     expected_yy = xr.DataArray(
-        [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]],
-        [("x", [1, 1, 1, 2, 2]), ("y", [0, 1])],
+        data=[[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]],
+        coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0},
+        dims=("x", "y"),
     )
     assert_identical(expected_yy, actual_yy)
 
@@ -180,14 +196,14 @@ def test_da_groupby_quantile():
     x = [0, 1]
     foo = xr.DataArray(
         np.reshape(np.arange(365 * 2), (365, 2)),
-        coords=dict(time=times, x=x),
+        coords={"time": times, "x": x},
         dims=("time", "x"),
     )
     g = foo.groupby(foo.time.dt.month)
 
     actual = g.quantile(0, dim=...)
     expected = xr.DataArray(
-        [
+        data=[
             0.0,
             62.0,
             120.0,
@@ -201,12 +217,111 @@ def test_da_groupby_quantile():
             610.0,
             670.0,
         ],
-        [("month", np.arange(1, 13))],
+        coords={"month": np.arange(1, 13), "quantile": 0},
+        dims="month",
     )
     assert_identical(expected, actual)
 
     actual = g.quantile(0, dim="time")[:2]
-    expected = xr.DataArray([[0.0, 1], [62.0, 63]], [("month", [1, 2]), ("x", [0, 1])])
+    expected = xr.DataArray(
+        data=[[0.0, 1], [62.0, 63]],
+        coords={"month": [1, 2], "x": [0, 1], "quantile": 0},
+        dims=("month", "x"),
+    )
+    assert_identical(expected, actual)
+
+
+def test_ds_groupby_quantile():
+    ds = xr.Dataset(
+        data_vars={"a": ("x", [1, 2, 3, 4, 5, 6])}, coords={"x": [1, 1, 1, 2, 2, 2]}
+    )
+
+    # Scalar quantile
+    expected = xr.Dataset(
+        data_vars={"a": ("x", [2, 5])}, coords={"quantile": 0.5, "x": [1, 2]}
+    )
+    actual = ds.groupby("x").quantile(0.5)
+    assert_identical(expected, actual)
+
+    # Vector quantile
+    expected = xr.Dataset(
+        data_vars={"a": (("x", "quantile"), [[1, 3], [4, 6]])},
+        coords={"x": [1, 2], "quantile": [0, 1]},
+    )
+    actual = ds.groupby("x").quantile([0, 1])
+    assert_identical(expected, actual)
+
+    # Multiple dimensions
+    ds = xr.Dataset(
+        data_vars={
+            "a": (
+                ("x", "y"),
+                [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]],
+            )
+        },
+        coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]},
+    )
+
+    actual_x = ds.groupby("x").quantile(0, dim=...)
+    expected_x = xr.Dataset({"a": ("x", [1, 4])}, coords={"x": [1, 2], "quantile": 0})
+    assert_identical(expected_x, actual_x)
+
+    actual_y = ds.groupby("y").quantile(0, dim=...)
+    expected_y = xr.Dataset({"a": ("y", [1, 22])}, coords={"y": [0, 1], "quantile": 0})
+    assert_identical(expected_y, actual_y)
+
+    actual_xx = ds.groupby("x").quantile(0)
+    expected_xx = xr.Dataset(
+        {"a": (("x", "y"), [[1, 11, 22], [4, 15, 24]])},
+        coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0},
+    )
+    assert_identical(expected_xx, actual_xx)
+
+    actual_yy = ds.groupby("y").quantile(0)
+    expected_yy = xr.Dataset(
+        {"a": (("x", "y"), [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]])},
+        coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0},
+    ).transpose()
+    assert_identical(expected_yy, actual_yy)
+
+    times = pd.date_range("2000-01-01", periods=365)
+    x = [0, 1]
+    foo = xr.Dataset(
+        {"a": (("time", "x"), np.reshape(np.arange(365 * 2), (365, 2)))},
+        coords=dict(time=times, x=x),
+    )
+    g = foo.groupby(foo.time.dt.month)
+
+    actual = g.quantile(0, dim=...)
+    expected = xr.Dataset(
+        {
+            "a": (
+                "month",
+                [
+                    0.0,
+                    62.0,
+                    120.0,
+                    182.0,
+                    242.0,
+                    304.0,
+                    364.0,
+                    426.0,
+                    488.0,
+                    548.0,
+                    610.0,
+                    670.0,
+                ],
+            )
+        },
+        coords={"month": np.arange(1, 13), "quantile": 0},
+    )
+    assert_identical(expected, actual)
+
+    actual = g.quantile(0, dim="time").isel(month=slice(None, 2))
+    expected = xr.Dataset(
+        data_vars={"a": (("month", "x"), [[0.0, 1], [62.0, 63]])},
+        coords={"month": [1, 2], "x": [0, 1], "quantile": 0},
+    )
     assert_identical(expected, actual)
 
 
diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py
index cfce5d6f645..0b410383a34 100644
--- a/xarray/tests/test_missing.py
+++ b/xarray/tests/test_missing.py
@@ -5,7 +5,13 @@
 import pytest
 
 import xarray as xr
-from xarray.core.missing import NumpyInterpolator, ScipyInterpolator, SplineInterpolator
+from xarray.core.missing import (
+    NumpyInterpolator,
+    ScipyInterpolator,
+    SplineInterpolator,
+    get_clean_interp_index,
+    _get_nan_block_lengths,
+)
 from xarray.core.pycompat import dask_array_type
 from xarray.tests import (
     assert_array_equal,
@@ -153,7 +159,7 @@ def test_interpolate_pd_compat_polynomial():
 def test_interpolate_unsorted_index_raises():
     vals = np.array([1, 2, 3], dtype=np.float64)
     expected = xr.DataArray(vals, dims="x", coords={"x": [2, 1, 3]})
-    with raises_regex(ValueError, "Index must be monotonicly increasing"):
+    with raises_regex(ValueError, "Index 'x' must be monotonically increasing"):
         expected.interpolate_na(dim="x", method="index")
 
 
@@ -169,12 +175,19 @@ def test_interpolate_invalid_interpolator_raises():
         da.interpolate_na(dim="x", method="foo")
 
 
+def test_interpolate_duplicate_values_raises():
+    data = np.random.randn(2, 3)
+    da = xr.DataArray(data, coords=[("x", ["a", "a"]), ("y", [0, 1, 2])])
+    with raises_regex(ValueError, "Index 'x' has duplicate values"):
+        da.interpolate_na(dim="x", method="foo")
+
+
 def test_interpolate_multiindex_raises():
     data = np.random.randn(2, 3)
     data[1, 1] = np.nan
     da = xr.DataArray(data, coords=[("x", ["a", "b"]), ("y", [0, 1, 2])])
     das = da.stack(z=("x", "y"))
-    with raises_regex(TypeError, "Index must be castable to float64"):
+    with raises_regex(TypeError, "Index 'z' must be castable to float64"):
         das.interpolate_na(dim="z")
 
 
@@ -439,3 +452,114 @@ def test_ffill_dataset(ds):
 @requires_bottleneck
 def test_bfill_dataset(ds):
     ds.ffill(dim="time")
+
+
+@requires_bottleneck
+@pytest.mark.parametrize(
+    "y, lengths",
+    [
+        [np.arange(9), [[3, 3, 3, 0, 3, 3, 0, 2, 2]]],
+        [np.arange(9) * 3, [[9, 9, 9, 0, 9, 9, 0, 6, 6]]],
+        [[0, 2, 5, 6, 7, 8, 10, 12, 14], [[6, 6, 6, 0, 4, 4, 0, 4, 4]]],
+    ],
+)
+def test_interpolate_na_nan_block_lengths(y, lengths):
+    arr = [[np.nan, np.nan, np.nan, 1, np.nan, np.nan, 4, np.nan, np.nan]]
+    da = xr.DataArray(arr * 2, dims=["x", "y"], coords={"x": [0, 1], "y": y})
+    index = get_clean_interp_index(da, dim="y", use_coordinate=True)
+    actual = _get_nan_block_lengths(da, dim="y", index=index)
+    expected = da.copy(data=lengths * 2)
+    assert_equal(actual, expected)
+
+
+@pytest.fixture
+def da_time():
+    return xr.DataArray(
+        [np.nan, 1, 2, np.nan, np.nan, 5, np.nan, np.nan, np.nan, np.nan, 10],
+        dims=["t"],
+    )
+
+
+def test_interpolate_na_max_gap_errors(da_time):
+    with raises_regex(
+        NotImplementedError, "max_gap not implemented for unlabeled coordinates"
+    ):
+        da_time.interpolate_na("t", max_gap=1)
+
+    with raises_regex(ValueError, "max_gap must be a scalar."):
+        da_time.interpolate_na("t", max_gap=(1,))
+
+    da_time["t"] = pd.date_range("2001-01-01", freq="H", periods=11)
+    with raises_regex(TypeError, "Underlying index is"):
+        da_time.interpolate_na("t", max_gap=1)
+
+    with raises_regex(TypeError, "Expected integer or floating point"):
+        da_time.interpolate_na("t", max_gap="1H", use_coordinate=False)
+
+    with raises_regex(ValueError, "Could not convert 'huh' to timedelta64"):
+        da_time.interpolate_na("t", max_gap="huh")
+
+
+@requires_bottleneck
+@pytest.mark.parametrize(
+    "time_range_func",
+    [pd.date_range, pytest.param(xr.cftime_range, marks=pytest.mark.xfail)],
+)
+@pytest.mark.parametrize("transform", [lambda x: x, lambda x: x.to_dataset(name="a")])
+@pytest.mark.parametrize(
+    "max_gap", ["3H", np.timedelta64(3, "h"), pd.to_timedelta("3H")]
+)
+def test_interpolate_na_max_gap_time_specifier(
+    da_time, max_gap, transform, time_range_func
+):
+    da_time["t"] = time_range_func("2001-01-01", freq="H", periods=11)
+    expected = transform(
+        da_time.copy(data=[np.nan, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan, 10])
+    )
+    actual = transform(da_time).interpolate_na("t", max_gap=max_gap)
+    assert_equal(actual, expected)
+
+
+@requires_bottleneck
+@pytest.mark.parametrize(
+    "coords",
+    [
+        pytest.param(None, marks=pytest.mark.xfail()),
+        {"x": np.arange(4), "y": np.arange(11)},
+    ],
+)
+def test_interpolate_na_2d(coords):
+    da = xr.DataArray(
+        [
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+        ],
+        dims=["x", "y"],
+        coords=coords,
+    )
+
+    actual = da.interpolate_na("y", max_gap=2)
+    expected_y = da.copy(
+        data=[
+            [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11],
+        ]
+    )
+    assert_equal(actual, expected_y)
+
+    actual = da.interpolate_na("x", max_gap=3)
+    expected_x = xr.DataArray(
+        [
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+        ],
+        dims=["x", "y"],
+        coords=coords,
+    )
+    assert_equal(actual, expected_x)
diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py
index a31da162487..a02fef2faeb 100644
--- a/xarray/tests/test_sparse.py
+++ b/xarray/tests/test_sparse.py
@@ -856,6 +856,10 @@ def test_dask_token():
     import dask
 
     s = sparse.COO.from_numpy(np.array([0, 0, 1, 2]))
+
+    # https://github.com/pydata/sparse/issues/300
+    s.__dask_tokenize__ = lambda: dask.base.normalize_token(s.__dict__)
+
     a = DataArray(s)
     t1 = dask.base.tokenize(a)
     t2 = dask.base.tokenize(a)
diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
index fd9e9b039ac..0be6f8af464 100644
--- a/xarray/tests/test_units.py
+++ b/xarray/tests/test_units.py
@@ -222,7 +222,9 @@ def convert_units(obj, to):
             if name != obj.name
         }
 
-        new_obj = xr.DataArray(name=name, data=data, coords=coords, attrs=obj.attrs)
+        new_obj = xr.DataArray(
+            name=name, data=data, coords=coords, attrs=obj.attrs, dims=obj.dims
+        )
     elif isinstance(obj, unit_registry.Quantity):
         units = to.get(None)
         new_obj = obj.to(units) if units is not None else obj
@@ -307,19 +309,689 @@ def __repr__(self):
 
 
 class function:
-    def __init__(self, name):
-        self.name = name
-        self.func = getattr(np, name)
+    def __init__(self, name_or_function, *args, **kwargs):
+        if callable(name_or_function):
+            self.name = name_or_function.__name__
+            self.func = name_or_function
+        else:
+            self.name = name_or_function
+            self.func = getattr(np, name_or_function)
+            if self.func is None:
+                raise AttributeError(
+                    f"module 'numpy' has no attribute named '{self.name}'"
+                )
+
+        self.args = args
+        self.kwargs = kwargs
 
     def __call__(self, *args, **kwargs):
-        return self.func(*args, **kwargs)
+        all_args = list(self.args) + list(args)
+        all_kwargs = {**self.kwargs, **kwargs}
+
+        return self.func(*all_args, **all_kwargs)
 
     def __repr__(self):
         return f"function_{self.name}"
 
 
+def test_apply_ufunc_dataarray(dtype):
+    func = function(
+        xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1}
+    )
+
+    array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.m
+    x = np.arange(20) * unit_registry.s
+    data_array = xr.DataArray(data=array, dims="x", coords={"x": x})
+
+    expected = attach_units(func(strip_units(data_array)), extract_units(data_array))
+    result = func(data_array)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(
+    reason="pint does not implement `np.result_type` and align strips units"
+)
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan)))
+def test_align_dataarray(fill_value, variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit
+    array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit
+    x = np.arange(2) * original_unit
+    x_a1 = np.array([10, 5]) * original_unit
+    x_a2 = np.array([10, 5]) * coord_unit
+
+    y1 = np.arange(5) * original_unit
+    y2 = np.arange(2, 7) * dim_unit
+
+    data_array1 = xr.DataArray(
+        data=array1, coords={"x": x, "x_a": ("x", x_a1), "y": y1}, dims=("x", "y")
+    )
+    data_array2 = xr.DataArray(
+        data=array2, coords={"x": x, "x_a": ("x", x_a2), "y": y2}, dims=("x", "y")
+    )
+
+    fill_value = fill_value * data_unit
+    func = function(xr.align, join="outer", fill_value=fill_value)
+    if error is not None:
+        with pytest.raises(error):
+            func(data_array1, data_array2)
+
+        return
+
+    stripped_kwargs = {
+        key: strip_units(
+            convert_units(value, {None: original_unit})
+            if isinstance(value, unit_registry.Quantity)
+            else value
+        )
+        for key, value in func.kwargs.items()
+    }
+    units = extract_units(data_array1)
+    # FIXME: should the expected_b have the same units as data_array1
+    # or data_array2?
+    expected_a, expected_b = tuple(
+        attach_units(elem, units)
+        for elem in func(
+            strip_units(data_array1),
+            strip_units(convert_units(data_array2, units)),
+            **stripped_kwargs,
+        )
+    )
+    result_a, result_b = func(data_array1, data_array2)
+
+    assert_equal_with_units(expected_a, result_a)
+    assert_equal_with_units(expected_b, result_b)
+
+
+@pytest.mark.xfail(
+    reason="pint does not implement `np.result_type` and align strips units"
+)
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan)))
+def test_align_dataset(fill_value, unit, variant, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit
+    array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit
+
+    x = np.arange(2) * original_unit
+    x_a1 = np.array([10, 5]) * original_unit
+    x_a2 = np.array([10, 5]) * coord_unit
+
+    y1 = np.arange(5) * original_unit
+    y2 = np.arange(2, 7) * dim_unit
+
+    ds1 = xr.Dataset(
+        data_vars={"a": (("x", "y"), array1)},
+        coords={"x": x, "x_a": ("x", x_a1), "y": y1},
+    )
+    ds2 = xr.Dataset(
+        data_vars={"a": (("x", "y"), array2)},
+        coords={"x": x, "x_a": ("x", x_a2), "y": y2},
+    )
+
+    fill_value = fill_value * data_unit
+    func = function(xr.align, join="outer", fill_value=fill_value)
+    if error is not None:
+        with pytest.raises(error):
+            func(ds1, ds2)
+
+        return
+
+    stripped_kwargs = {
+        key: strip_units(
+            convert_units(value, {None: original_unit})
+            if isinstance(value, unit_registry.Quantity)
+            else value
+        )
+        for key, value in func.kwargs.items()
+    }
+    units = extract_units(ds1)
+    # FIXME: should the expected_b have the same units as ds1 or ds2?
+    expected_a, expected_b = tuple(
+        attach_units(elem, units)
+        for elem in func(
+            strip_units(ds1), strip_units(convert_units(ds2, units)), **stripped_kwargs
+        )
+    )
+    result_a, result_b = func(ds1, ds2)
+
+    assert_equal_with_units(expected_a, result_a)
+    assert_equal_with_units(expected_b, result_b)
+
+
+def test_broadcast_dataarray(dtype):
+    array1 = np.linspace(0, 10, 2) * unit_registry.Pa
+    array2 = np.linspace(0, 10, 3) * unit_registry.Pa
+
+    a = xr.DataArray(data=array1, dims="x")
+    b = xr.DataArray(data=array2, dims="y")
+
+    expected_a, expected_b = tuple(
+        attach_units(elem, extract_units(a))
+        for elem in xr.broadcast(strip_units(a), strip_units(b))
+    )
+    result_a, result_b = xr.broadcast(a, b)
+
+    assert_equal_with_units(expected_a, result_a)
+    assert_equal_with_units(expected_b, result_b)
+
+
+def test_broadcast_dataset(dtype):
+    array1 = np.linspace(0, 10, 2) * unit_registry.Pa
+    array2 = np.linspace(0, 10, 3) * unit_registry.Pa
+
+    ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("y", array2)})
+
+    (expected,) = tuple(
+        attach_units(elem, extract_units(ds)) for elem in xr.broadcast(strip_units(ds))
+    )
+    (result,) = xr.broadcast(ds)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`combine_by_coords` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+def test_combine_by_coords(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+    array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+    x = np.arange(1, 4) * 10 * original_unit
+    y = np.arange(2) * original_unit
+    z = np.arange(3) * original_unit
+
+    other_array1 = np.ones_like(array1) * data_unit
+    other_array2 = np.ones_like(array2) * data_unit
+    other_x = np.arange(1, 4) * 10 * dim_unit
+    other_y = np.arange(2, 4) * dim_unit
+    other_z = np.arange(3, 6) * coord_unit
+
+    ds = xr.Dataset(
+        data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)},
+        coords={"x": x, "y": y, "z": ("x", z)},
+    )
+    other = xr.Dataset(
+        data_vars={"a": (("y", "x"), other_array1), "b": (("y", "x"), other_array2)},
+        coords={"x": other_x, "y": other_y, "z": ("x", other_z)},
+    )
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.combine_by_coords([ds, other])
+
+        return
+
+    units = extract_units(ds)
+    expected = attach_units(
+        xr.combine_by_coords(
+            [strip_units(ds), strip_units(convert_units(other, units))]
+        ),
+        units,
+    )
+    result = xr.combine_by_coords([ds, other])
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="blocked by `where`")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+def test_combine_nested(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+    array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+
+    x = np.arange(1, 4) * 10 * original_unit
+    y = np.arange(2) * original_unit
+    z = np.arange(3) * original_unit
+
+    ds1 = xr.Dataset(
+        data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)},
+        coords={"x": x, "y": y, "z": ("x", z)},
+    )
+    ds2 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), np.ones_like(array1) * data_unit),
+            "b": (("y", "x"), np.ones_like(array2) * data_unit),
+        },
+        coords={
+            "x": np.arange(3) * dim_unit,
+            "y": np.arange(2, 4) * dim_unit,
+            "z": ("x", np.arange(-3, 0) * coord_unit),
+        },
+    )
+    ds3 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit),
+            "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit),
+        },
+        coords={
+            "x": np.arange(3, 6) * dim_unit,
+            "y": np.arange(4, 6) * dim_unit,
+            "z": ("x", np.arange(3, 6) * coord_unit),
+        },
+    )
+    ds4 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), -1 * np.ones_like(array1) * data_unit),
+            "b": (("y", "x"), -1 * np.ones_like(array2) * data_unit),
+        },
+        coords={
+            "x": np.arange(6, 9) * dim_unit,
+            "y": np.arange(6, 8) * dim_unit,
+            "z": ("x", np.arange(6, 9) * coord_unit),
+        },
+    )
+
+    func = function(xr.combine_nested, concat_dim=["x", "y"])
+    if error is not None:
+        with pytest.raises(error):
+            func([[ds1, ds2], [ds3, ds4]])
+
+        return
+
+    units = extract_units(ds1)
+    convert_and_strip = lambda ds: strip_units(convert_units(ds, units))
+    expected = attach_units(
+        func(
+            [
+                [strip_units(ds1), convert_and_strip(ds2)],
+                [convert_and_strip(ds3), convert_and_strip(ds4)],
+            ]
+        ),
+        units,
+    )
+    result = func([[ds1, ds2], [ds3, ds4]])
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`concat` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+    ),
+)
+def test_concat_dataarray(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {"data": (unit, original_unit), "dims": (original_unit, unit)}
+    data_unit, dims_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+    array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit
+    x1 = np.arange(5, 15) * original_unit
+    x2 = np.arange(5) * dims_unit
+
+    arr1 = xr.DataArray(data=array1, coords={"x": x1}, dims="x")
+    arr2 = xr.DataArray(data=array2, coords={"x": x2}, dims="x")
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.concat([arr1, arr2], dim="x")
+
+        return
+
+    expected = attach_units(
+        xr.concat([strip_units(arr1), strip_units(arr2)], dim="x"), extract_units(arr1)
+    )
+    result = xr.concat([arr1, arr2], dim="x")
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`concat` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+    ),
+)
+def test_concat_dataset(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {"data": (unit, original_unit), "dims": (original_unit, unit)}
+    data_unit, dims_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+    array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit
+    x1 = np.arange(5, 15) * original_unit
+    x2 = np.arange(5) * dims_unit
+
+    ds1 = xr.Dataset(data_vars={"a": ("x", array1)}, coords={"x": x1})
+    ds2 = xr.Dataset(data_vars={"a": ("x", array2)}, coords={"x": x2})
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.concat([ds1, ds2], dim="x")
+
+        return
+
+    expected = attach_units(
+        xr.concat([strip_units(ds1), strip_units(ds2)], dim="x"), extract_units(ds1)
+    )
+    result = xr.concat([ds1, ds2], dim="x")
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="blocked by `where`")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+def test_merge_dataarray(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * original_unit
+    array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit
+    array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit
+
+    x = np.arange(2) * original_unit
+    y = np.arange(3) * original_unit
+    z = np.arange(4) * original_unit
+    u = np.linspace(10, 20, 2) * original_unit
+    v = np.linspace(10, 20, 3) * original_unit
+    w = np.linspace(10, 20, 4) * original_unit
+
+    arr1 = xr.DataArray(
+        name="a",
+        data=array1,
+        coords={"x": x, "y": y, "u": ("x", u), "v": ("y", v)},
+        dims=("x", "y"),
+    )
+    arr2 = xr.DataArray(
+        name="b",
+        data=array2,
+        coords={
+            "x": np.arange(2, 4) * dim_unit,
+            "z": z,
+            "u": ("x", np.linspace(20, 30, 2) * coord_unit),
+            "w": ("z", w),
+        },
+        dims=("x", "z"),
+    )
+    arr3 = xr.DataArray(
+        name="c",
+        data=array3,
+        coords={
+            "y": np.arange(3, 6) * dim_unit,
+            "z": np.arange(4, 8) * dim_unit,
+            "v": ("y", np.linspace(10, 20, 3) * coord_unit),
+            "w": ("z", np.linspace(10, 20, 4) * coord_unit),
+        },
+        dims=("y", "z"),
+    )
+
+    func = function(xr.merge)
+    if error is not None:
+        with pytest.raises(error):
+            func([arr1, arr2, arr3])
+
+        return
+
+    units = {name: original_unit for name in list("abcuvwxyz")}
+    convert_and_strip = lambda arr: strip_units(convert_units(arr, units))
+    expected = attach_units(
+        func([strip_units(arr1), convert_and_strip(arr2), convert_and_strip(arr3)]),
+        units,
+    )
+    result = func([arr1, arr2, arr3])
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="blocked by `where`")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+def test_merge_dataset(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+    array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+
+    x = np.arange(11, 14) * original_unit
+    y = np.arange(2) * original_unit
+    z = np.arange(3) * original_unit
+
+    ds1 = xr.Dataset(
+        data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)},
+        coords={"x": x, "y": y, "z": ("x", z)},
+    )
+    ds2 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), np.ones_like(array1) * data_unit),
+            "b": (("y", "x"), np.ones_like(array2) * data_unit),
+        },
+        coords={
+            "x": np.arange(3) * dim_unit,
+            "y": np.arange(2, 4) * dim_unit,
+            "z": ("x", np.arange(-3, 0) * coord_unit),
+        },
+    )
+    ds3 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit),
+            "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit),
+        },
+        coords={
+            "x": np.arange(3, 6) * dim_unit,
+            "y": np.arange(4, 6) * dim_unit,
+            "z": ("x", np.arange(3, 6) * coord_unit),
+        },
+    )
+
+    func = function(xr.merge)
+    if error is not None:
+        with pytest.raises(error):
+            func([ds1, ds2, ds3])
+
+        return
+
+    units = extract_units(ds1)
+    convert_and_strip = lambda ds: strip_units(convert_units(ds, units))
+    expected = attach_units(
+        func([strip_units(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]), units
+    )
+    result = func([ds1, ds2, ds3])
+
+    assert_equal_with_units(expected, result)
+
+
 @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
-def test_replication(func, dtype):
+def test_replication_dataarray(func, dtype):
     array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
     data_array = xr.DataArray(data=array, dims="x")
 
@@ -330,8 +1002,33 @@ def test_replication(func, dtype):
     assert_equal_with_units(expected, result)
 
 
+@pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
+def test_replication_dataset(func, dtype):
+    array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
+    array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa
+    x = np.arange(20).astype(dtype) * unit_registry.m
+    y = np.arange(10).astype(dtype) * unit_registry.m
+    z = y.to(unit_registry.mm)
+
+    ds = xr.Dataset(
+        data_vars={"a": ("x", array1), "b": ("y", array2)},
+        coords={"x": x, "y": y, "z": ("y", z)},
+    )
+
+    numpy_func = getattr(np, func.__name__)
+    expected = ds.copy(
+        data={name: numpy_func(array.data) for name, array in ds.data_vars.items()}
+    )
+    result = func(ds)
+
+    assert_equal_with_units(expected, result)
+
+
 @pytest.mark.xfail(
-    reason="np.full_like on Variable strips the unit and pint does not allow mixed args"
+    reason=(
+        "pint is undecided on how `full_like` should work, so incorrect errors "
+        "may be expected: hgrecco/pint#882"
+    )
 )
 @pytest.mark.parametrize(
     "unit,error",
@@ -344,8 +1041,9 @@ def test_replication(func, dtype):
         pytest.param(unit_registry.ms, None, id="compatible_unit"),
         pytest.param(unit_registry.s, None, id="identical_unit"),
     ),
+    ids=repr,
 )
-def test_replication_full_like(unit, error, dtype):
+def test_replication_full_like_dataarray(unit, error, dtype):
     array = np.linspace(0, 5, 10) * unit_registry.s
     data_array = xr.DataArray(data=array, dims="x")
 
@@ -360,6 +1058,163 @@ def test_replication_full_like(unit, error, dtype):
         assert_equal_with_units(expected, result)
 
 
+@pytest.mark.xfail(
+    reason=(
+        "pint is undecided on how `full_like` should work, so incorrect errors "
+        "may be expected: hgrecco/pint#882"
+    )
+)
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.ms, None, id="compatible_unit"),
+        pytest.param(unit_registry.s, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+def test_replication_full_like_dataset(unit, error, dtype):
+    array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
+    array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa
+    x = np.arange(20).astype(dtype) * unit_registry.m
+    y = np.arange(10).astype(dtype) * unit_registry.m
+    z = y.to(unit_registry.mm)
+
+    ds = xr.Dataset(
+        data_vars={"a": ("x", array1), "b": ("y", array2)},
+        coords={"x": x, "y": y, "z": ("y", z)},
+    )
+
+    fill_value = -1 * unit
+    if error is not None:
+        with pytest.raises(error):
+            xr.full_like(ds, fill_value=fill_value)
+
+        return
+
+    expected = ds.copy(
+        data={
+            name: np.full_like(array, fill_value=fill_value)
+            for name, array in ds.data_vars.items()
+        }
+    )
+    result = xr.full_like(ds, fill_value=fill_value)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`where` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize("fill_value", (np.nan, 10.2))
+def test_where_dataarray(fill_value, unit, error, dtype):
+    array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+
+    x = xr.DataArray(data=array, dims="x")
+    cond = x < 5 * unit_registry.m
+    # FIXME: this should work without wrapping in array()
+    fill_value = np.array(fill_value) * unit
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.where(cond, x, fill_value)
+
+        return
+
+    fill_value_ = (
+        fill_value.to(unit_registry.m)
+        if isinstance(fill_value, unit_registry.Quantity)
+        and fill_value.check(unit_registry.m)
+        else fill_value
+    )
+    expected = attach_units(
+        xr.where(cond, strip_units(x), strip_units(fill_value_)), extract_units(x)
+    )
+    result = xr.where(cond, x, fill_value)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`where` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize("fill_value", (np.nan, 10.2))
+def test_where_dataset(fill_value, unit, error, dtype):
+    array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+    array2 = np.linspace(-5, 0, 10).astype(dtype) * unit_registry.m
+    x = np.arange(10) * unit_registry.s
+
+    ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x})
+    cond = ds.x < 5 * unit_registry.s
+    # FIXME: this should work without wrapping in array()
+    fill_value = np.array(fill_value) * unit
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.where(cond, ds, fill_value)
+
+        return
+
+    fill_value_ = (
+        fill_value.to(unit_registry.m)
+        if isinstance(fill_value, unit_registry.Quantity)
+        and fill_value.check(unit_registry.m)
+        else fill_value
+    )
+    expected = attach_units(
+        xr.where(cond, strip_units(ds), strip_units(fill_value_)), extract_units(ds)
+    )
+    result = xr.where(cond, ds, fill_value)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="pint does not implement `np.einsum`")
+def test_dot_dataarray(dtype):
+    array1 = (
+        np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype)
+        * unit_registry.m
+        / unit_registry.s
+    )
+    array2 = (
+        np.linspace(10, 20, 10 * 20).reshape(10, 20).astype(dtype) * unit_registry.s
+    )
+
+    arr1 = xr.DataArray(data=array1, dims=("x", "y"))
+    arr2 = xr.DataArray(data=array2, dims=("y", "z"))
+
+    expected = array1.dot(array2)
+    result = xr.dot(arr1, arr2)
+
+    assert_equal_with_units(expected, result)
+
+
 class TestDataArray:
     @pytest.mark.filterwarnings("error:::pint[.*]")
     @pytest.mark.parametrize(
@@ -1114,7 +1969,7 @@ def test_broadcast_equals(self, unit, dtype):
                 dim={"z": np.linspace(10, 20, 12) * unit_registry.s},
                 axis=1,
             ),
-            method("drop", labels="x"),
+            method("drop_sel", labels="x"),
             method("reset_coords", names="x2"),
             method("copy"),
             pytest.param(
@@ -3190,7 +4045,7 @@ def test_reindex_like(self, unit, error, dtype):
                 marks=pytest.mark.xfail(reason="strips units"),
             ),
             pytest.param(
-                method("apply", np.fabs),
+                method("map", np.fabs),
                 marks=pytest.mark.xfail(reason="fabs strips units"),
             ),
         ),
@@ -3365,7 +4220,7 @@ def test_grouped_operations(self, func, dtype):
             method("rename_dims", x="offset_x"),
             method("swap_dims", {"x": "x2"}),
             method("expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1),
-            method("drop", labels="x"),
+            method("drop_sel", labels="x"),
             method("drop_dims", "z"),
             method("set_coords", names="c"),
             method("reset_coords", names="x2"),
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 528027ed149..d92a68729b5 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -542,6 +542,15 @@ def test_copy_index_with_data_errors(self):
         with raises_regex(ValueError, "must match shape of object"):
             orig.copy(data=new_data)
 
+    def test_replace(self):
+        var = Variable(("x", "y"), [[1.5, 2.0], [3.1, 4.3]], {"foo": "bar"})
+        result = var._replace()
+        assert_identical(result, var)
+
+        new_data = np.arange(4).reshape(2, 2)
+        result = var._replace(data=new_data)
+        assert_array_equal(result.data, new_data)
+
     def test_real_and_imag(self):
         v = self.cls("x", np.arange(3) - 1j * np.arange(3), {"foo": "bar"})
         expected_re = self.cls("x", np.arange(3), {"foo": "bar"})
@@ -1477,6 +1486,10 @@ def test_reduce(self):
 
         with raises_regex(ValueError, "cannot supply both"):
             v.mean(dim="x", axis=0)
+        with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"):
+            v.mean(dim="x", allow_lazy=True)
+        with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"):
+            v.mean(dim="x", allow_lazy=False)
 
     def test_quantile(self):
         v = Variable(["x", "y"], self.d)