From 0db4f42f5c2d61cef732ffb557676f467fd538b0 Mon Sep 17 00:00:00 2001
From: Annette Stellema <40450353+stellema@users.noreply.github.com>
Date: Fri, 11 Oct 2024 16:06:12 +1100
Subject: [PATCH] Add eva main function and add eva.fit_gev fitstart options

---
 ci/environment.yml                            |    1 +
 .../worked_example-HadGEM3-GC31-MM.ipynb      |    8 +-
 docs/user_guide/worked_example_stationary.rst |    4 +-
 setup.py                                      |    1 +
 unseen/eva.py                                 | 1022 +++++++++++------
 unseen/moments.py                             |    4 +-
 unseen/stability.py                           |    2 +-
 unseen/tests/conftest.py                      |   59 +-
 unseen/tests/test_eva.py                      |  405 +++----
 9 files changed, 910 insertions(+), 596 deletions(-)

diff --git a/ci/environment.yml b/ci/environment.yml
index bd6f119..133d6dc 100644
--- a/ci/environment.yml
+++ b/ci/environment.yml
@@ -9,6 +9,7 @@ dependencies:
   - dask-jobqueue
   - geopandas
   - gitpython
+  - lmoments3
   - netcdf4
   - numpy
   - pip
diff --git a/docs/user_guide/worked_example-HadGEM3-GC31-MM.ipynb b/docs/user_guide/worked_example-HadGEM3-GC31-MM.ipynb
index 58984d3..5ee6550 100644
--- a/docs/user_guide/worked_example-HadGEM3-GC31-MM.ipynb
+++ b/docs/user_guide/worked_example-HadGEM3-GC31-MM.ipynb
@@ -1220,9 +1220,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/599/dbi599/unseen/unseen/eva.py:253: UserWarning: Data fit failed. Retrying with 'generate_estimates=True'.\n",
+      "/home/599/dbi599/unseen/unseen/eva.py:253: UserWarning: Data fit failed. Retrying with 'fitstart='scipy_subet''.\n",
       "  warnings.warn(\n",
-      "/home/599/dbi599/unseen/unseen/eva.py:253: UserWarning: Data fit failed. Retrying with 'generate_estimates=True'.\n",
+      "/home/599/dbi599/unseen/unseen/eva.py:253: UserWarning: Data fit failed. Retrying with 'fitstart='scipy_subet''.\n",
       "  warnings.warn(\n"
      ]
     },
@@ -2609,12 +2609,12 @@
    ],
    "source": [
     "model_da_indep.plot.hist(bins=50, density=True, alpha=0.7, facecolor='tab:blue')\n",
-    "model_raw_shape, model_raw_loc, model_raw_scale = eva.fit_gev(model_da_indep_stacked.values, generate_estimates=True)\n",
+    "model_raw_shape, model_raw_loc, model_raw_scale = eva.fit_gev(model_da_indep_stacked.values, fitstart='scipy_subet')\n",
     "model_raw_pdf = gev.pdf(xvals, model_raw_shape, model_raw_loc, model_raw_scale)\n",
     "plt.plot(xvals, model_raw_pdf, color='tab:blue', linewidth=4.0, label='model')\n",
     "\n",
     "model_da_bc.plot.hist(bins=50, density=True, alpha=0.7, facecolor='tab:orange')\n",
-    "model_bc_shape, model_bc_loc, model_bc_scale = eva.fit_gev(model_da_bc_stacked.values, generate_estimates=True)\n",
+    "model_bc_shape, model_bc_loc, model_bc_scale = eva.fit_gev(model_da_bc_stacked.values, fitstart='scipy_subet')\n",
     "model_bc_pdf = gev.pdf(xvals, model_bc_shape, model_bc_loc, model_bc_scale)\n",
     "plt.plot(xvals, model_bc_pdf, color='tab:orange', linewidth=4.0, label='model (corrected)')\n",
     "\n",
diff --git a/docs/user_guide/worked_example_stationary.rst b/docs/user_guide/worked_example_stationary.rst
index 8f59a4e..8b6f241 100644
--- a/docs/user_guide/worked_example_stationary.rst
+++ b/docs/user_guide/worked_example_stationary.rst
@@ -438,12 +438,12 @@ to see the effect of the bias correction.
     model_da_bc_stacked = model_da_bc.dropna('lead_time').stack({'sample': ['ensemble', 'init_date', 'lead_time']})
 
     model_da_indep.plot.hist(bins=50, density=True, alpha=0.7, facecolor='tab:blue')
-    model_raw_shape, model_raw_loc, model_raw_scale = eva.fit_gev(model_da_indep_stacked.values, generate_estimates=True)
+    model_raw_shape, model_raw_loc, model_raw_scale = eva.fit_gev(model_da_indep_stacked.values, fitstart='scipy_subet')
     model_raw_pdf = gev.pdf(xvals, model_raw_shape, model_raw_loc, model_raw_scale)
     plt.plot(xvals, model_raw_pdf, color='tab:blue', linewidth=4.0, label='model')
 
     model_da_bc.plot.hist(bins=50, density=True, alpha=0.7, facecolor='tab:orange')
-    model_bc_shape, model_bc_loc, model_bc_scale = eva.fit_gev(model_da_bc_stacked.values, generate_estimates=True)
+    model_bc_shape, model_bc_loc, model_bc_scale = eva.fit_gev(model_da_bc_stacked.values, fitstart='scipy_subet')
     model_bc_pdf = gev.pdf(xvals, model_bc_shape, model_bc_loc, model_bc_scale)
     plt.plot(xvals, model_bc_pdf, color='tab:orange', linewidth=4.0, label='model (corrected)')
 
diff --git a/setup.py b/setup.py
index b217d55..96d229f 100644
--- a/setup.py
+++ b/setup.py
@@ -29,6 +29,7 @@
             "bias_correction = unseen.bias_correction:_main",
             "stability = unseen.stability:_main",
             "moments = unseen.moments:_main",
+            "eva = unseen.eva:_main",
         ]
     },
 )
diff --git a/unseen/eva.py b/unseen/eva.py
index 18f6481..5a745bd 100644
--- a/unseen/eva.py
+++ b/unseen/eva.py
@@ -1,5 +1,8 @@
 """Extreme value analysis functions."""
 
+import argparse
+from lmoments3 import distr
+import matplotlib.pyplot as plt
 from matplotlib import colormaps
 from matplotlib.dates import date2num
 from matplotlib.ticker import AutoMinorLocator
@@ -9,6 +12,11 @@
 from scipy.stats.distributions import chi2
 import warnings
 from xarray import apply_ufunc, DataArray
+import xclim.indices.stats as xcstats
+
+from . import fileio
+from . import general_utils
+from . import time_utils
 
 
 def event_in_context(data, threshold, direction):
@@ -48,35 +56,231 @@ def event_in_context(data, threshold, direction):
     return n_events, n_population, return_period, percentile
 
 
-def fit_stationary_gev(x, user_estimates=[], generate_estimates=False):
-    """Estimate stationary shape, location and scale parameters.
+def fit_gev(
+    data,
+    core_dim="time",
+    stationary=True,
+    covariate=None,
+    fitstart="LMM",
+    loc1=0,
+    scale1=0,
+    assert_good_fit=False,
+    pick_best_model=False,
+    alpha=0.05,
+    method="Nelder-Mead",
+):
+    """Estimate stationary or nonstationary GEV distribution parameters.
 
     Parameters
     ----------
-    x : array_like
-         Data to use in estimating the distribution parameters
-    user_estimates : list, optional
-        Initial guess of the shape, loc and scale parameters
-    generate_estimates : bool, optional
-        Generate initial parameter guesses using a data subset
+    data : array_like
+        Data to use in estimating the distribution parameters
+    core_dim : str, default "time"
+        Name of time/sample dimension in `data` and `covariate`
+    stationary : bool, default True
+        Fit as a stationary GEV using `fit_stationary_gev`
+    covariate : array_like, optional
+        A nonstationary covariate array with the same `core_dim` as `data`
+    fitstart : {array-like, 'LMM', 'MM', 'scipy', 'scipy_fitstart',
+    'scipy_subset', 'xclim_fitstart', 'xclim'}, default 'scipy_fitstart'
+        Initial guess method/estimate of the shape, loc and scale parameters
+    loc1, scale1 : float or None, default 0
+        Initial guess of trend parameters. If None, the trend is fixed at zero
+    assert_good_fit : bool, default False
+        Stationary parameters must pass goodness of fit test at `alpha` level.
+        Attempt a retry and return NaNs if the test fails again.
+    pick_best_model : {False, 'lrt', 'aic', 'bic'}, default False
+        Method to test relative fit of stationary and nonstationary models.
+        Do not use if you don't want nonstationary parameters. The output will
+        have GEV 5 parameters even if stationary is True.
+    alpha : float, default 0.05
+        Fit test p-value threshold for stationary fit (relative/goodness of fit)
+    method : {'Nelder-Mead', 'L-BFGS-B', 'TNC', 'SLSQP', 'Powell',
+    'trust-constr', 'COBYLA'}, default 'Nelder-Mead'
+        Optimization method for nonstationary fit
 
     Returns
     -------
-    shape, loc, scale : float
-        GEV parameters
+    dparams : xarray.DataArray
+        The GEV distribution parameters with the same dimensions as `data`
+        (excluding `core_dim`) and a new dimension `dparams`:
+        If stationary, dparams = (c, loc, scale).
+        If nonstationary, dparams = (c, loc0, loc1, scale0, scale1).
+
+    Notes
+    -----
+    - Use `unpack_gev_params` to get the shape, location and scale parameters
+    as a separate array. If nonstationary, the output will still be three
+    parameters that have an extra covariate dimension.
+    - For stationary data the parameters are estimated using
+     `scipy.stats.genextreme.fit`.
+    - For nonstationary data, the parameters (including the linear location and
+    scale trend parameters are estimated by minimising
+    a penalised negative log-likelihood function.
+    - The `assert_good_fit` option ensures that the distribution fit is
+    accepted if the goodness of fit test `p-value > alpha` (i.e., accept
+    the null hypothesis). It will retry the fit using data[::2] to generate
+    an initial guess.
+    - The `covariate` must be numeric and have dimensions aligned with `data`.
+    - If `pick_best_model` is a method, the relative goodness of fit method is
+    used to determine if stationary or nonstationary parameters are returned.
+
     """
+    kwargs = {k: v for k, v in locals().items() if k not in ["data", "covariate"]}
+
+    def _assert_good_fit_1d(data, dparams, alpha, fit_kwargs):
+        """Test goodness of stationary GEV fit and retry if failed."""
+        pvalue = check_gev_fit(data, dparams)
+
+        if np.all(pvalue < alpha):
+            # Retry fit using alternative fitstart methods
+            warnings.warn("GEV fit failed. Retrying fitstart with data subset.")
+            _kwargs = fit_kwargs.copy()
+            _kwargs["fitstart"] = _fitstart_1d(data[::2], fitstart)
+            _kwargs["stationary"] = True
+            dparams = _fit_1d(data, covariate, **_kwargs)
+            pvalue = check_gev_fit(data, dparams)
+
+        # Return NaNs if the test still fails
+        if np.all(pvalue < alpha):
+            # Return NaNs
+            dparams = dparams * np.nan
+            warnings.warn("Data fit failed.")
+        return dparams
+
+    def _fit_1d(
+        data,
+        covariate,
+        stationary,
+        fitstart,
+        core_dim,
+        loc1,
+        scale1,
+        assert_good_fit,
+        pick_best_model,
+        alpha,
+        method,
+    ):
+        """Estimate distribution parameters."""
+        if np.all(~np.isfinite(data)):
+            # Return NaNs if all input data is infinite
+            n = 3 if stationary else 5
+            return np.array([np.nan] * n)
+
+        if np.isnan(data).any():
+            # Mask NaNs in data
+            mask = np.isfinite(data)
+            data = data[mask]
+            if not stationary:
+                covariate = covariate[mask]
+
+        # Initial estimates of distribution parameters for MLE
+        if isinstance(fitstart, str):
+            dparams_i = _fitstart_1d(data, fitstart)
+        else:
+            # User provided initial estimates
+            dparams_i = fitstart
+
+        # Use genextreme to get stationary distribution parameters
+        if stationary or pick_best_model:
+            if dparams_i is None:
+                dparams_i = genextreme.fit(data)
+            else:
+                dparams = genextreme.fit(
+                    data, dparams_i[0], loc=dparams_i[1], scale=dparams_i[2]
+                )
+            dparams = np.array([i for i in dparams], dtype="float64")
+
+            if assert_good_fit:
+                dparams = _assert_good_fit_1d(data, dparams, alpha, kwargs)
+
+        if not stationary or pick_best_model:
+            # Temporarily reverse shape sign (scipy uses different sign convention)
+            dparams_ns_i = [-dparams_i[0], dparams_i[1], loc1, dparams_i[2], scale1]
+
+            # Optimisation bounds (scale parameter must be non-negative)
+            bounds = [(None, None)] * 5
+            bounds[3] = (0, None)  # Positive scale parameter
+            if loc1 is None:
+                dparams_ns_i[2] = 0
+                bounds[2] = (0, 0)  # Only allow trend in scale
+            if scale1 is None:
+                dparams_ns_i[4] = 0
+                bounds[4] = (0, 0)  # Only allow trend in location
+
+            # Minimise the negative log-likelihood function to get optimal dparams
+            res = minimize(
+                nllf,
+                dparams_ns_i,
+                args=(data, covariate),
+                method=method,
+                bounds=bounds,
+            )
+            dparams_ns = np.array([i for i in res.x], dtype="float64")
+            # Reverse shape sign for consistency with scipy.stats results
+            dparams_ns[0] *= -1
 
-    if any(user_estimates):
-        shape, loc, scale = user_estimates
-        shape, loc, scale = genextreme.fit(x, shape, loc=loc, scale=scale)
+            # Stationary and nonstationary model relative goodness of fit
+            if pick_best_model:
+                dparams = get_best_GEV_model_1d(
+                    data, dparams, dparams_ns, covariate, alpha, test=pick_best_model
+                )
+            else:
+                dparams = dparams_ns
+
+        return dparams
 
-    elif generate_estimates:
-        # Generate initial estimates using a data subset (useful for large datasets)
-        shape, loc, scale = genextreme.fit(x[::2])
-        shape, loc, scale = genextreme.fit(x, shape, loc=loc, scale=scale)
+    if covariate is not None:
+        covariate = _format_covariate(data, covariate, core_dim)
     else:
-        shape, loc, scale = genextreme.fit(x)
-    return shape, loc, scale
+        covariate = 0  # Dummy covariate for apply_ufunc
+
+    # Input core dimensions
+    if core_dim is not None and hasattr(covariate, core_dim):
+        # Covariate has the same core dimension as data
+        input_core_dims = [[core_dim], [core_dim]]
+    else:
+        # Covariate is a 1D array
+        input_core_dims = [[core_dim], []]
+
+    n_params = 5 if (not stationary or pick_best_model) else 3
+    # Fit data to distribution parameters
+    dparams = apply_ufunc(
+        _fit_1d,
+        data,
+        covariate,
+        input_core_dims=input_core_dims,
+        output_core_dims=[["dparams"]],
+        vectorize=True,
+        dask="parallelized",
+        kwargs=kwargs,
+        output_dtypes=["float64"],
+        dask_gufunc_kwargs={"output_sizes": {"dparams": n_params}},
+    )
+    if isinstance(data, DataArray):
+        # Format output (consistent with xclim)
+        if n_params == 3:
+            dparams.coords["dparams"] = ["c", "loc", "scale"]
+        else:
+            dparams.coords["dparams"] = ["c", "loc0", "loc1", "scale0", "scale1"]
+
+        # Add coordinates for the distribution parameters
+        dist_name = "genextreme" if stationary else "nonstationary genextreme"
+        if isinstance(fitstart, str):
+            estimator = fitstart.upper()
+        else:
+            estimator = f"User estimates = {fitstart}"
+
+        dparams.attrs = dict(
+            long_name=f"{dist_name.capitalize()} parameters",
+            description=f"Parameters of the {dist_name} distribution",
+            method="MLE",
+            estimator=estimator,
+            scipy_dist="genextreme",
+            units="",
+        )
+
+    return dparams
 
 
 def penalised_sum(x):
@@ -96,12 +300,12 @@ def penalised_sum(x):
     return total + penalty
 
 
-def nllf(theta, x, covariate=None):
+def nllf(dparams, x, covariate=None):
     """Penalised negative log-likelihood function.
 
     Parameters
     ----------
-    theta : tuple of floats
+    dparams : tuple of floats
         Distribution parameters (stationary or non-stationary)
     x : array_like
         Data to use in estimating the distribution parameters
@@ -118,39 +322,39 @@ def nllf(theta, x, covariate=None):
     This is modified version of `scipy.stats.genextreme.fit` for fitting extreme
     value distribution parameters, in which the location and scale parameters
     can vary linearly with a covariate.
-    The log-likelihood equations are based on Méndez et al. (2007).
+    The log-likelihood equations are based on Coles (2001; page 55).
     It is suitable for stationary or nonstationary distributions:
-        - theta = shape, loc, scale
-        - theta = shape, loc, loc1, scale, scale1
-    The nonstationary parameters are returned if `theta` incudes the location
+        - dparams = shape, loc, scale
+        - dparams = shape, loc, loc1, scale, scale1
+    The nonstationary parameters are returned if `dparams` incudes the location
     and scale trend parameters.
     A large finite penalty (instead of infinity) is applied for observations
     beyond the support of the distribution.
     The NLLF is not finite when the shape is nonzero and Z is negative because
     the PDF is zero (i.e., ``log(0)=inf)``).
     """
-    if len(theta) == 5:
+    if len(dparams) == 5:
         # Nonstationary GEV parameters
-        shape, loc0, loc1, scale0, scale1 = theta
+        shape, loc0, loc1, scale0, scale1 = dparams
         loc = loc0 + loc1 * covariate
         scale = scale0 + scale1 * covariate
 
     else:
         # Stationary GEV parameters
-        shape, loc, scale = theta
+        shape, loc, scale = dparams
 
     s = (x - loc) / scale
 
     # Calculate the NLLF (type 1 or types 2-3 extreme value distributions)
     # Type I extreme value distributions (Gumbel)
-    if shape == 0:
+    if np.fabs(shape) < 1e-6:
         valid = scale > 0
         L = np.log(scale, where=valid) + s + np.exp(-s)
 
     # Types II & III extreme value distributions (Fréchet and Weibull)
     else:
         Z = 1 + shape * s
-        # The log-likelihood function is finite when the shape and Z are positive
+        # The log-likelihood is finite when the shape and Z are positive
         valid = np.isfinite(Z) & (Z > 0) & (scale > 0)
         L = (
             np.log(scale, where=valid)
@@ -160,251 +364,114 @@ def nllf(theta, x, covariate=None):
 
     L = np.where(valid, L, np.inf)
 
-    # Sum function along all axes (where finite) & add penalty for each infinite element
+    # Sum function (where finite) & add penalty for each infinite element
     total = penalised_sum(L)
     return total
 
 
-def fit_gev(
-    data,
-    core_dim="time",
-    stationary=True,
-    covariate=None,
-    loc1=0,
-    scale1=0,
-    test_fit_goodness=False,
-    relative_fit_test=None,
-    alpha=0.05,
-    user_estimates=[],
-    generate_estimates=False,
-    method="Nelder-Mead",
-):
-    """Estimate stationary or nonstationary GEV distribution parameters.
+def _fitstart_1d(data, method):
+    """Generate initial parameter guesses for nonstationary fit.
 
     Parameters
     ----------
     data : array_like
         Data to use in estimating the distribution parameters
-    core_dim : str, optional
-        Name of time/sample dimension in `data`. Default: "time".
-    stationary : bool, optional
-        Fit as a stationary GEV using `fit_stationary_gev`. Default: True.
-    covariate : array_like or str, optional
-        A nonstationary covariate array or coordinate name
-    loc1, scale1 : float or None, optional
-        Initial guess of trend parameters. If None, the trend is fixed at zero.
-    test_fit_goodness : bool, optional
-        Test goodness of fit and attempt retry. Default False.
-    relative_fit_test : {None, 'lrt', 'aic', 'bic'}, optional
-        Method to test relative fit of stationary and nonstationary models.
-        The trend parameters are set to zero if the stationary fit is better.
-    alpha : float, optional
-        Goodness of fit p-value threshold. Default 0.05.
-    user estimates: list, optional
-        Initial guess of the shape, loc and scale parameters
-    generate_estimates : bool, optional
-        Generate initial parameter guesses using a data subset
-    method : str, optional
-        Optimization method for nonstationary fit {'Nelder-Mead', 'L-BFGS-B',
-        'TNC', 'SLSQP', 'Powell', 'trust-constr', 'COBYLA'}.
+    method : {'LMM', 'scipy_fitstart', 'scipy', 'scipy_subset',
+    'xclim_fitstart', 'xclim'}
+        Initial guess method of the shape, loc and scale parameters
 
     Returns
     -------
-    theta : xr.DataArray
-        The GEV distribution parameters with the same dimensions as `data`
-        (excluding `core_dim`) and a new dimension `theta`:
-        If stationary, theta = (shape, loc, scale).
-        If nonstationary, theta = (shape, loc0, loc1, scale0, scale1).
+    dparams_i : list
+        Initial guess of the shape, loc and scale parameters
 
     Notes
     -----
-    For stationary data the shape, location and scale parameters are
-    estimated using `gev_stationary_fit`.
-    For nonstationary data, the linear location and scale trend
-    parameters are estimated using a penalized negative log-likelihood
-    function with initial estimates based on the stationary fit.
-    The distribution fit is considered good if the p-value is above
-     `alpha` (i.e., accept the null hypothesis). Otherwise, it retry the fit
-    without `user_estimates` and with `generating_estimates`.
-    If data is a stacked forecast ensemble, the covariate may need to be
-    stacked in the same way.
+    - Use `scipy_fitstart` to reproduce the scipy fit in `fit_gev`.
+    - The LMM shape sign is reversed for consistency with scipy.stats results.
     """
-    kwargs = locals()  # Function inputs
-
-    def _format_covariate(data, covariate, stationary, core_dim):
-        """Format or generate covariate ."""
-        if not stationary:
-            if isinstance(covariate, str):
-                # Select coordinate in data
-                covariate = data[covariate]
-            elif covariate is None:
-                # Guess covariate
-                if core_dim in data:
-                    covariate = data[core_dim]
-                else:
-                    covariate = np.arange(data.shape[0])
-
-            if covariate.dtype.kind not in set("buifc"):
-                # Convert dates to numbers
-                covariate = date2num(covariate)
-
-            if not isinstance(covariate, DataArray):
-                # Convert to DataArray with the same core_dim as data
-                covariate = DataArray(covariate, dims=[core_dim])
-        else:
-            covariate = 0  # Dummy covariate for apply_ufunc
-
-        return covariate
-
-    def _fit(
-        data,
-        covariate,
-        core_dim,
-        user_estimates,
-        generate_estimates,
-        loc1,
-        scale1,
-        stationary,
-        test_fit_goodness,
-        relative_fit_test,
-        alpha,
-        method,
-    ):
-        """Estimate distribution parameters."""
-        if np.all(~np.isfinite(data)):
-            # Return NaNs if all input data is infinite
-            n = 3 if stationary else 5
-            return np.array([np.nan] * n)
-
-        if np.isnan(data).any():
-            # Mask NaNs in data
-            mask = np.isfinite(data)
-            data = data[mask]
-            if not stationary:
-                covariate = covariate[mask]
 
-        # Use genextreme to get stationary distribution parameters
-        theta = fit_stationary_gev(data, user_estimates, generate_estimates)
+    if method == "LMM":
+        # L-moments method
+        dparams_i = distr.gev.lmom_fit(data)
+        dparams_i = list(dparams_i.values())
+        dparams_i[0] = -dparams_i[0]
+
+    elif method == "scipy_fitstart":
+        # Moments method?
+        dparams_i = genextreme._fitstart(data)
+
+    elif method == "scipy":
+        # MLE
+        dparams_i = genextreme.fit(data)
+
+    elif method == "scipy_subset":
+        # MLE (equivalent of fitstart='scipy_subet')
+        dparams_i = genextreme.fit(data[::2])
+
+    elif method == "xclim_fitstart":
+        # Approximates the probability weighted moments (PWM) method?
+        args, kwargs = xcstats._fit_start(data, dist="genextreme")
+        dparams_i = [args[0], kwargs["loc"], kwargs["scale"]]
+
+    elif method == "xclim":
+        # MLE
+        da = DataArray(data, dims="time")
+        dparams_i = xcstats.fit(da, "genextreme", method="MLE")
+    else:
+        raise ValueError(f"Unknown fitstart method: {method}")
 
-        if not stationary:
-            # Use genextreme as initial guesses
-            shape, loc, scale = theta
-            # Temporarily reverse shape sign (scipy uses different sign convention)
-            theta_i = [-shape, loc, loc1, scale, scale1]
+    return np.array(dparams_i, dtype="float64")
 
-            # Optimisation bounds (scale parameter must be non-negative)
-            bounds = [(None, None)] * len(theta_i)
-            bounds[3] = (0, None)  # Positive scale parameter
-            if loc1 is None:
-                theta_i[2] = 0
-                bounds[2] = (0, 0)  # Only allow trend in scale
-            if scale1 is None:
-                theta_i[4] = 0
-                bounds[4] = (0, 0)  # Only allow trend in location
 
-            # Minimise the negative log-likelihood function to get optimal theta
-            res = minimize(
-                nllf,
-                theta_i,
-                args=(data, covariate),
-                method=method,
-                bounds=bounds,
-            )
-            theta = res.x
+def _format_covariate(data, covariate, core_dim):
+    """Format or generate covariate.
 
-            if isinstance(relative_fit_test, str):
-                # Test relative fit of stationary and nonstationary models
-                # Negative log likelihood using genextreme parameters
-                L1 = nllf([-shape, loc, scale], data)
-                L2 = res.fun
+    Parameters
+    ----------
+    data : xarray.DataArray
+        Data to use in estimating the distribution parameters
+    covariate : array_like or str
+        A nonstationary covariate array or coordinate name
+    core_dim : str
+        Name of time/sample dimension in `data`
 
-                result = check_gev_relative_fit(
-                    data, L1, L2, test=relative_fit_test, alpha=alpha
-                )
-                if not result:
-                    warnings.warn(
-                        f"{relative_fit_test} test failed. Returning stationary parameters."
-                    )
-                    # Return stationary parameters (genextreme.fit output) with no trend
-                    theta = [shape, loc, 0, scale, 0]
+    Returns
+    -------
+    covariate : xarray.DataArray
+        Covariate with the same core_dim as data
+    """
 
-            # Reverse shape sign for consistency with scipy.stats results
-            theta[0] *= -1
-
-        theta = np.array([i for i in theta], dtype="float64")
-
-        if test_fit_goodness and stationary:
-            pvalue = check_gev_fit(data, theta, core_dim=core_dim)
-
-            # Accept null distribution of the Anderson-darling test (same distribution)
-            if np.all(pvalue < alpha):
-                if any(kwargs["user_estimates"]):
-                    warnings.warn("GEV fit failed. Retrying without user_estimates.")
-                    kwargs["user_estimates"] = [None, None, None]
-                    theta = _fit(data, covariate, **kwargs)
-                elif not kwargs["generate_estimates"]:
-                    warnings.warn(
-                        "GEV fit failed. Retrying with generate_estimates=True."
-                    )
-                    kwargs["generate_estimates"] = True  # Also breaks loop
-                    theta = _fit(data, covariate, **kwargs)
-                else:
-                    # Return NaNs
-                    theta = theta * np.nan
-                    warnings.warn("Data fit failed.")
-        return theta
-
-    data = kwargs.pop("data")
-    covariate = kwargs.pop("covariate")
-    covariate = _format_covariate(data, covariate, stationary, core_dim)
+    if isinstance(covariate, str):
+        # Select coordinate in data
+        covariate = data[covariate]
 
-    # Input core dimensions
-    if core_dim is not None and hasattr(covariate, core_dim):
-        # Covariate has the same core dimension as data
-        input_core_dims = [[core_dim], [core_dim]]
-    else:
-        # Covariate is a 1D array
-        input_core_dims = [[core_dim], []]
+    elif covariate is None:
+        # Guess covariate
+        if core_dim in data:
+            covariate = data[core_dim]
+        else:
+            covariate = np.arange(data.shape[0])
 
-    # Expected output of theta
-    n = 3 if stationary else 5
+    if covariate.dtype.kind not in set("buifc"):
+        # Convert dates to numbers
+        covariate = date2num(covariate)
 
-    # Fit data to distribution parameters
-    theta = apply_ufunc(
-        _fit,
-        data,
-        covariate,
-        input_core_dims=input_core_dims,
-        output_core_dims=[["theta"]],
-        vectorize=True,
-        dask="parallelized",
-        kwargs=kwargs,
-        output_dtypes=["float64"],
-        dask_gufunc_kwargs=dict(output_sizes={"theta": n}),
-    )
+    if not isinstance(covariate, DataArray):
+        # Convert to DataArray with the same core_dim as data
+        covariate = DataArray(covariate, dims=[core_dim])
 
-    # Format output
-    if len(data.shape) == 1:
-        # Return a tuple of scalars instead of a data array
-        theta = np.array([i for i in theta], dtype="float64")
-
-    if isinstance(theta, DataArray):
-        if stationary:
-            coords = ["shape", "loc", "scale"]
-        else:
-            coords = ["shape", "loc0", "loc1", "scale0", "scale1"]
-        theta.coords["theta"] = coords
-    return theta
+    return covariate
 
 
-def check_gev_fit(data, params, core_dim="time", **kwargs):
+def check_gev_fit(data, dparams, core_dim=[], **kwargs):
     """Test stationary GEV distribution goodness of fit.
 
     Parameters
     ----------
     data: array_like
         Data used to estimate the distribution parameters
-    params : tuple of floats
+    dparams : tuple of floats
         Shape, location and scale parameters
     core_dim : str, optional
         Data dimension to test over
@@ -417,10 +484,10 @@ def check_gev_fit(data, params, core_dim="time", **kwargs):
         Goodness of fit p-value
     """
 
-    def _goodness_of_fit(data, params, **kwargs):
+    def _goodness_of_fit(data, dparams, **kwargs):
         """Test GEV goodness of fit."""
         # Stationary parameters
-        shape, loc, scale = params
+        shape, loc, scale = dparams
 
         res = goodness_of_fit(
             genextreme,
@@ -430,11 +497,14 @@ def _goodness_of_fit(data, params, **kwargs):
         )
         return res.pvalue
 
+    if not isinstance(core_dim, list):
+        core_dim = [core_dim]
+
     pvalue = apply_ufunc(
         _goodness_of_fit,
         data,
-        params,
-        input_core_dims=[[core_dim], ["theta"]],
+        dparams,
+        input_core_dims=[core_dim, ["dparams"]],
         vectorize=True,
         kwargs=kwargs,
         dask="parallelized",
@@ -452,7 +522,7 @@ def check_gev_relative_fit(data, L1, L2, test, alpha=0.05):
         Data to use in estimating the distribution parameters
     L1, L2 : float
         Negative log-likelihood of the stationary and nonstationary model
-    test : {"aic", "bic", "lrt"}
+    test : {"AIC", "BIC", "LRT"}
         Method to test relative fit of stationary and nonstationary models
 
     Returns
@@ -484,15 +554,35 @@ def check_gev_relative_fit(data, L1, L2, test, alpha=0.05):
         # Bayesian Information Criterion (BIC)
         bic = [k * np.log(len(data)) + (2 * n) for n, k in zip([L1, L2], dof)]
         result = bic[0] > bic[1]
+    else:
+        raise ValueError("test must be 'LRT', 'AIC' or 'BIC'", test)
     return result
 
 
-def unpack_gev_params(params, covariate=None):
-    """Unpack shape, loc, scale from params.
+def get_best_GEV_model_1d(data, dparams, dparams_ns, covariate, alpha, test):
+    """Get the best GEV model based on a relative fit test."""
+    # Calculate the stationary GEV parameters
+    shape, loc, scale = dparams
+
+    # Negative log-likelihood of stationary and nonstationary models
+    L1 = nllf([-shape, loc, scale], data)
+    L2 = nllf([-dparams_ns[0], *dparams_ns[1:]], data, covariate)
+
+    result = check_gev_relative_fit(data, L1, L2, test=test, alpha=alpha)
+    if not result:
+        # Return the stationary parameters with no trend
+        dparams = np.array([shape, loc, 0, scale, 0], dtype="float64")
+    else:
+        dparams = dparams_ns
+    return dparams
+
+
+def unpack_gev_params(dparams, covariate=None):
+    """Unpack shape, loc, scale from dparams.
 
     Parameters
     ----------
-    params : xarray.DataArray, list or tuple
+    dparams : xarray.DataArray, list or tuple
         Stationary or nonstationary GEV parameters
     covariate : xarray.DataArray, optional
         Covariate values for nonstationary parameters
@@ -504,31 +594,37 @@ def unpack_gev_params(params, covariate=None):
         covariate.
     """
 
-    if hasattr(params, "theta"):
+    if hasattr(dparams, "dparams"):
         # Select the correct dimension in a DataArray
-        params = [params.isel(theta=i) for i in range(params.theta.size)]
+        dparams = [dparams.isel(dparams=i) for i in range(dparams.dparams.size)]
+    elif not isinstance(dparams, (list, tuple)) and dparams.ndim > 1:
+        warnings.warn(f"Assuming parameters on axis=-1 (shape={dparams.shape})")
+        dparams = np.split(dparams, dparams.shape[-1], axis=-1)
 
     # Unpack GEV parameters
-    if len(params) == 3:
+    if len(dparams) == 3:
         # Stationary GEV parameters
-        shape, loc, scale = params
+        shape, loc, scale = dparams
 
-    elif len(params) == 5:
+    elif len(dparams) == 5:
         # Nonstationary GEV parameters
-        shape, loc0, loc1, scale0, scale1 = params
+        shape, loc0, loc1, scale0, scale1 = dparams
         loc = loc0 + loc1 * covariate
         scale = scale0 + scale1 * covariate
+    else:
+        raise ValueError("Expected 3 or 5 GEV parameters.", dparams)
+
     return shape, loc, scale
 
 
-def get_return_period(event, params=None, covariate=None, **kwargs):
+def get_return_period(event, dparams=None, covariate=None, **kwargs):
     """Get return periods for a given events.
 
     Parameters
     ----------
     event : float or array_like
         Event value(s) for which to calculate the return period
-    params : array_like, optional
+    dparams : array_like, optional
         Stationary or nonstationary GEV parameters
     covariate : array_like, optional
         Covariate values for nonstationary parameters
@@ -540,10 +636,11 @@ def get_return_period(event, params=None, covariate=None, **kwargs):
     return_period : float or array_like
         Return period(s) for the event(s)
     """
-    if params is None:
-        params = fit_gev(**kwargs)
 
-    shape, loc, scale = unpack_gev_params(params, covariate)
+    if dparams is None:
+        dparams = fit_gev(**kwargs)
+
+    shape, loc, scale = unpack_gev_params(dparams, covariate)
 
     probability = apply_ufunc(
         genextreme.sf,
@@ -559,14 +656,14 @@ def get_return_period(event, params=None, covariate=None, **kwargs):
     return 1.0 / probability
 
 
-def get_return_level(return_period, params=None, covariate=None, **kwargs):
+def get_return_level(return_period, dparams=None, covariate=None, **kwargs):
     """Get the return levels for given return periods.
 
     Parameters
     ----------
     return_period : float or array_like
        Return period(s) for which to calculate the return level
-    params : array_like, optional
+    dparams : array_like, optional
         Stationary or nonstationary GEV parameters
     covariate : array_like, optional
         Covariate values for nonstationary parameters
@@ -577,11 +674,18 @@ def get_return_level(return_period, params=None, covariate=None, **kwargs):
     -------
     return_level : float or array_like
         Return level(s) of the given return period(s)
+
+    Notes
+    -----
+    If `return_period` is an ndarray, make sure dimensions are aligned with
+    `dparams`. For example, dparams dims=(lat, lon, dparams) and return_period
+    dims=(lat, lon, period).
     """
-    if params is None:
-        params = fit_gev(**kwargs)
 
-    shape, loc, scale = unpack_gev_params(params, covariate)
+    if dparams is None:
+        dparams = fit_gev(**kwargs)
+
+    shape, loc, scale = unpack_gev_params(dparams, covariate)
 
     return_level = apply_ufunc(
         genextreme.isf,
@@ -603,34 +707,31 @@ def gev_return_curve(
     bootstrap_method="non-parametric",
     n_bootstraps=1000,
     max_return_period=4,
-    user_estimates=None,
     max_shape_ratio=None,
+    **fit_kwargs,
 ):
     """Return x and y data for a GEV return period curve.
 
     Parameters
     ----------
-    data : xarray DataArray
+    data : xarray.DataArray
     event_value : float
         Magnitude of event of interest
     bootstrap_method : {'parametric', 'non-parametric'}, default "non-parametric"
     n_bootstraps : int, default 1000
     max_return_period : float, default 4
         The maximum return period is 10^{max_return_period}
-    user_estimates: list, default None
-        Initial estimates of the shape, loc and scale parameters
     max_shape_ratio: float, optional
-        Maximum bootstrap shape parameter to full population shape parameter ratio (e.g. 6.0)
-        Useful for filtering bad fits to bootstrap samples
+        Maximum bootstrap shape parameter to full population shape parameter
+        ratio (e.g. 6.0). Useful for filtering bad fits to bootstrap samples
+    fit_kwargs : dict, optional
+        Additional keyword arguments to pass to `fit_gev`
     """
+    rng = np.random.default_rng(seed=0)
 
     # GEV fit to data
-    if user_estimates:
-        shape, loc, scale = fit_gev(
-            data, user_estimates=user_estimates, stationary=True
-        )
-    else:
-        shape, loc, scale = fit_gev(data, generate_estimates=True, stationary=True)
+    dparams = fit_gev(data, **fit_kwargs)
+    shape, loc, scale = unpack_gev_params(dparams)
 
     curve_return_periods = np.logspace(0, max_return_period, num=10000)
     curve_probabilities = 1.0 / curve_return_periods
@@ -646,8 +747,8 @@ def gev_return_curve(
         if bootstrap_method == "parametric":
             boot_data = genextreme.rvs(shape, loc=loc, scale=scale, size=len(data))
         elif bootstrap_method == "non-parametric":
-            boot_data = np.random.choice(data, size=data.shape, replace=True)
-        boot_shape, boot_loc, boot_scale = fit_gev(boot_data, generate_estimates=True)
+            boot_data = rng.choice(data, size=data.shape, replace=True)
+        boot_shape, boot_loc, boot_scale = fit_gev(boot_data, fitstart="scipy_subet")
         if max_shape_ratio:
             shape_ratio = abs(boot_shape) / abs(shape)
             if shape_ratio > max_shape_ratio:
@@ -698,15 +799,15 @@ def plot_gev_return_curve(
     ylabel=None,
     ylim=None,
     text=False,
-    user_estimates=None,
     max_shape_ratio=None,
+    **fit_kwargs,
 ):
     """Plot a single return period curve.
 
     Parameters
     ----------
     ax : matplotlib plot axis
-    data : xarray DataArray
+    data : xarray.DataArray
     event_value : float
         Magnitude of the event of interest
     direction : {'exceedance', 'deceedance'}, default 'exceedance'
@@ -721,11 +822,11 @@ def plot_gev_return_curve(
         Limits for y-axis
     text : bool, default False
        Write the return period (and 95% CI) on the plot
-    user_estimates: list, default None
-        Initial estimates of the shape, loc and scale parameters
     max_shape_ratio: float, optional
         Maximum bootstrap shape parameter to full population shape parameter ratio (e.g. 6.0)
         Useful for filtering bad fits to bootstrap samples
+    fit_kwargs : dict, optional
+        Additional keyword arguments to pass to `fit_gev`
     """
 
     if direction == "deceedance":
@@ -737,8 +838,8 @@ def plot_gev_return_curve(
         bootstrap_method=bootstrap_method,
         n_bootstraps=n_bootstraps,
         max_return_period=max_return_period,
-        user_estimates=user_estimates,
         max_shape_ratio=max_shape_ratio,
+        **fit_kwargs,
     )
     (
         curve_return_periods,
@@ -813,35 +914,53 @@ def plot_gev_return_curve(
     ax.grid()
 
 
-def plot_nonstationary_pdfs(ax, data, theta_s, theta_ns, covariate):
+def plot_nonstationary_pdfs(
+    data,
+    dparams_s,
+    dparams_ns,
+    covariate,
+    ax=None,
+    title="",
+    units=None,
+    cmap="rainbow",
+    outfile=None,
+):
     """Plot stationary and nonstationary GEV PDFs.
 
     Parameters
     ----------
-    ax : matplotlib.Axes
     data : array-like
         Data to plot the histogram
-    theta_s : tuple of floats
+    dparams_s : tuple of floats
         Stationary GEV parameters (shape, loc, scale)
-    theta_ns : tuple or array-like
+    dparams_ns : tuple or array-like
         Nonstationary GEV parameters (shape, loc0, loc1, scale0, scale1)
     covariate : array-like
-        Covariates values in which to plot the nonstationary PDFs
-
+        Covariate values in which to plot the nonstationary PDFs
+    ax : matplotlib.axes.Axes
+    title : str, optional
+    xlabel : str, optional
+    cmap : str, default "rainbow"
+    outfile : str, optional
 
     Returns
     -------
     ax : matplotlib.Axes
     """
+    if ax is None:
+        fig, ax = plt.subplots(1, 1, figsize=(10, 7))
+
+    ax.set_title(title, loc="left")
+
     n = covariate.size
-    colors = colormaps["rainbow"](np.linspace(0, 1, n))
-    shape, loc, scale = unpack_gev_params(theta_ns, covariate)
+    colors = colormaps[cmap](np.linspace(0, 1, n))
+    shape, loc, scale = unpack_gev_params(dparams_ns, covariate)
 
     # Histogram.
     _, bins, _ = ax.hist(data, bins=40, density=True, alpha=0.5, label="Histogram")
 
     # Stationary GEV PDF
-    shape_s, loc_s, scale_s = theta_s
+    shape_s, loc_s, scale_s = dparams_s
     pdf_s = genextreme.pdf(bins, shape_s, loc=loc_s, scale=scale_s)
     ax.plot(bins, pdf_s, c="k", ls="--", lw=2.8, label="Stationary")
 
@@ -850,64 +969,110 @@ def plot_nonstationary_pdfs(ax, data, theta_s, theta_ns, covariate):
         pdf_ns = genextreme.pdf(bins, shape, loc=loc[i], scale=scale[i])
         ax.plot(bins, pdf_ns, lw=1.6, c=colors[i], zorder=0, label=t)
 
+    ax.set_xlabel(units)
     ax.set_ylabel("Probability")
     ax.xaxis.set_minor_locator(AutoMinorLocator())
     ax.yaxis.set_minor_locator(AutoMinorLocator())
-    ax.legend(bbox_to_anchor=(1, 1))
+    ax.legend(loc="upper right", bbox_to_anchor=(1, 1), framealpha=0.3)
+    ax.set_xmargin(1e-3)
+
+    if outfile:
+        plt.tight_layout()
+        plt.savefig(outfile, dpi=200, bbox_inches="tight")
     return ax
 
 
 def plot_nonstationary_return_curve(
-    ax, return_periods, theta_s, theta_ns, covariate, dim="time"
+    return_periods,
+    dparams_s,
+    dparams_ns,
+    covariate,
+    dim="time",
+    ax=None,
+    title="",
+    units=None,
+    cmap="rainbow",
+    outfile=None,
 ):
     """Plot stationary and nonstationary return period curves.
 
     Parameters
     ----------
-    ax : matplotlib.Axes
     return_periods : array-like
-        Return periods to plot
-    theta_s : array-like or tuple of floats
+        Return periods to plot (x-axis)
+    dparams_s : array-like or tuple of floats
         Stationary GEV parameters (shape, loc, scale)
-    theta_ns : array-like or tuple of floats
+    dparams_ns : array-like or tuple of floats
         Nonstationary GEV parameters (shape, loc0, loc1, scale0, scale1)
     covariate : array-like
         Covariate values in which to show the nonstationary return levels
     dim : str, optional
         Covariate core dimension name, default "time"
+    ax : matplotlib.axes.Axes
+    title : str, optional
+    units : str, optional
+    cmap : str, default "rainbow"
+    outfile : str, optional
 
     Returns
     -------
-    ax : matplotlib.Axes
+    ax : matplotlib.axes.Axes
     """
+    if ax is None:
+        fig, ax = plt.subplots(1, 1, figsize=(9, 5))
+
+    ax.set_title(title, loc="left")
+
     n = covariate.size
-    colors = colormaps["rainbow"](np.linspace(0, 1, n))
+    colors = colormaps[cmap](np.linspace(0, 1, n))
 
     # Stationary return levels
-    return_levels = get_return_level(return_periods, theta_s)
-    ax.plot(
-        return_periods, return_levels, label="Stationary", c="k", ls="--", zorder=n + 1
-    )
+    if dparams_s is not None:
+        return_levels = get_return_level(return_periods, dparams_s)
+        ax.plot(
+            return_periods,
+            return_levels,
+            label="Stationary",
+            c="k",
+            ls="--",
+            zorder=n + 1,
+        )
 
     # Nonstationary return levels
-    return_levels = get_return_level(return_periods, theta_ns, covariate)
+    return_levels = get_return_level(return_periods, dparams_ns, covariate)
     for i, t in enumerate(covariate.values):
         ax.plot(return_periods, return_levels.isel({dim: i}), label=t, c=colors[i])
 
     ax.set_xscale("log")
-    ax.set_xlabel("Return period [years]")
+    ax.set_ylabel(units)
+    ax.set_xlabel("Return period")
     ax.yaxis.set_minor_locator(AutoMinorLocator())
     ax.set_xmargin(1e-2)
     ax.legend()
+
+    if outfile:
+        plt.tight_layout()
+        plt.savefig(outfile, dpi=200, bbox_inches="tight")
     return ax
 
 
-def plot_stacked_histogram(ax, dv1, dv2, bins, labels=None, dim="time"):
-    """Plot data binned by a covariate as a stacked histogram.
+def plot_stacked_histogram(
+    dv1,
+    dv2,
+    bins=None,
+    labels=None,
+    ax=None,
+    title="",
+    units=None,
+    cmap="rainbow",
+    legend=True,
+    outfile=None,
+):
+    """Histogram with data binned and stacked.
 
     Parameters
     ----------
-    ax : matplotlib.Axes
+
     dv1 : xarray.DataArray
         Data to plot in the histogram
     dv2 : xarray.DataArray
@@ -915,27 +1080,48 @@ def plot_stacked_histogram(ax, dv1, dv2, bins, labels=None, dim="time"):
     bins : array-like
         Bin edges of dv2
     labels : array-like, optional
-        Labels for each bin, default None (uses left side of each bin)
-    dim : str, default: "time"
-        Core dimension name of dv1 and dv2, default "time"
+        Labels for each bin, default None uses left side of each bin
+    dim : str, default "time"
+        Core dimension name of dv1 and dv2
+    ax : matplotlib.axes.Axes
+    title : str, optional
+    units : str, optional
+    cmap : str, default "rainbow"
+    legend : bool, optional
+    outfile : str, optional
 
     Returns
     -------
-    ax : matplotlib.Axes
+    ax : matplotlib.axes.Axes
     """
 
     assert dv1.size == dv2.size
 
+    if bins is None or np.ndim(bins) == 0:
+        bins = np.histogram_bin_edges(dv2, bins)
+
+        # Round bins to integers if possible
+        if np.all(np.diff(bins) >= 1):
+            bins = np.ceil(bins).astype(dtype=int)
+
+    if labels is None:
+        # Labels show left side of each bin
+        # labels = bins[:-1]
+        labels = [f"{bins[i]}-{bins[i+1] - 1}" for i in range(len(bins) - 1)]
+
     # Subset dv1 by bins
     dx_subsets = [
-        dv1.where((dv2 >= bins[a]) & (dv2 < bins[a + 1])) for a in range(len(bins) - 1)
+        dv1.where(((dv2 >= bins[a]) & (dv2 < bins[a + 1])).values)
+        for a in range(len(bins) - 1)
     ]
+    dx_subsets[-1] = dv1.where((dv2 >= bins[-2]).values)
 
-    if labels is None:
-        # Labels show left side of each bin
-        labels = bins[:-1]
+    colors = colormaps[cmap](np.linspace(0, 1, len(bins) - 1))
 
-    colors = colormaps["rainbow"](np.linspace(0, 1, len(bins) - 1))
+    if ax is None:
+        fig, ax = plt.subplots(1, 1, figsize=(10, 7))
+
+    ax.set_title(title, loc="left")
     ax.hist(
         dx_subsets,
         density=True,
@@ -945,6 +1131,202 @@ def plot_stacked_histogram(ax, dv1, dv2, bins, labels=None, dim="time"):
         edgecolor="k",
         label=labels,
     )
-    ax.legend()
+    if legend:
+        ax.legend()
+    ax.set_xlabel(units)
     ax.set_ylabel("Probability")
-    return ax
+
+    if outfile:
+        plt.tight_layout()
+        plt.savefig(outfile, dpi=200, bbox_inches="tight")
+    return ax, bins
+
+
+def _parse_command_line():
+    """Parse the command line for input arguments"""
+
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+
+    parser.add_argument("file", type=str, help="Forecast file")
+    parser.add_argument("var", type=str, help="Variable name")
+    parser.add_argument("outfile", type=str, help="Output file")
+    parser.add_argument(
+        "--stack_dims",
+        type=str,
+        nargs="*",
+        default=["ensemble", "init_date", "lead_time"],
+        help="Dimensions to stack",
+    )
+    parser.add_argument("--core_dim", type=str, default="time", help="Core dimension")
+    parser.add_argument(
+        "--stationary",
+        type=bool,
+        default=True,
+        help="Fit nonstationary GEV distribution",
+    )
+    parser.add_argument(
+        "--fitstart",
+        default="LMM",
+        choices=(
+            "LMM",
+            "scipy",
+            "scipy_fitstart",
+            "scipy_subset",
+            "xclim_MLE",
+            "xclim",
+            ["shape", "loc", "scale"],
+        ),
+        help="Initial guess method (or estimate) of the GEV parameters",
+    )
+    parser.add_argument(
+        "--assert_good_fit",
+        action="store_true",
+        default=False,
+        help="Test fit goodness",
+    )
+    parser.add_argument(
+        "--pick_best_model",
+        type=str,
+        default=None,
+        help="Relative fit test to pick stationary or nonstationary parameters",
+    )
+    parser.add_argument(
+        "--reference_time_period",
+        type=str,
+        nargs=2,
+        default=None,
+        help="Reference time period (start_date, end_date)",
+    )
+    parser.add_argument(
+        "--covariate", type=str, default="time.year", help="Covariate variable"
+    )
+    parser.add_argument(
+        "--covariate_file", type=str, default=None, help="Covariate file"
+    )
+    parser.add_argument(
+        "--min_lead", default=None, help="Minimum lead time (int or filename)"
+    )
+    parser.add_argument(
+        "--min_lead_kwargs",
+        type=str,
+        nargs="*",
+        default={},
+        action=general_utils.store_dict,
+        help="Minimum lead time file",
+    )
+    # parser.add_argument(
+    #     "--confidence_interval",
+    #     type=float,
+    #     default=0.95,
+    #     help="Confidence interval e.g., --confidence_interval 0.95",
+    # )
+    parser.add_argument(
+        "--ensemble_dim",
+        type=str,
+        default="ensemble",
+        help="Name of ensemble member dimension",
+    )
+    parser.add_argument(
+        "--init_dim",
+        type=str,
+        default="init_date",
+        help="Name of initial date dimension",
+    )
+    parser.add_argument(
+        "--lead_dim",
+        type=str,
+        default="lead_time",
+        help="Name of lead time dimension",
+    )
+    parser.add_argument(
+        "--output_chunks",
+        type=str,
+        nargs="*",
+        action=general_utils.store_dict,
+        default={},
+        help="Output chunks",
+    )
+    parser.add_argument(
+        "--dask_config", type=str, help="YAML file specifying dask client configuration"
+    )
+    args = parser.parse_args()
+
+    return args
+
+
+def _main():
+    """Run the command line program."""
+
+    args = _parse_command_line()
+
+    ds = fileio.open_dataset(args.file, variables=[args.var])
+
+    if args.covariate_file is not None:
+        # Add covariate to dataset (to ensure all operations are aligned)
+        ds_covariate = fileio.open_dataset(
+            args.covariate_file, variables=[args.covariate]
+        )
+        ds[args.covariate] = ds_covariate[args.covariate]
+
+    # Filter data by reference time period
+    if args.reference_time_period:
+        ds = time_utils.select_time_period(ds, args.reference_time_period)
+
+    # Filter data by minimum lead time
+    if args.min_lead:
+        if isinstance(args.min_lead, str):
+            # Load min_lead from file
+            ds_min_lead = fileio.open_dataset(args.min_lead, **args.min_lead_kwargs)
+            min_lead = ds_min_lead["min_lead"].load()
+            ds = ds.groupby(f"{args.init_dim}.month").where(
+                ds[args.lead_dim] >= min_lead
+            )
+            ds = ds.drop_vars("month")
+        else:
+            ds = ds.where(ds[args.lead_dim] >= args.min_lead)
+
+    # Stack dimensions along new "sample" dimension
+    if all([dim in ds[args.var].dims for dim in args.stack_dims]):
+        ds = ds.stack(**{"sample": args.stack_dims})
+        args.core_dim = "sample"
+
+    if not args.stationary:
+        covariate = _format_covariate(ds[args.var], ds[args.covariate], args.core_dim)
+    else:
+        covariate = None
+
+    dparams = fit_gev(
+        ds[args.var],
+        core_dim=args.core_dim,
+        stationary=args.stationary,
+        fitstart=args.fitstart,
+        covariate=covariate,
+        assert_good_fit=args.assert_good_fit,
+        pick_best_model=args.pick_best_model,
+    )
+
+    # Format outfile
+    dparams = dparams.to_dataset()
+
+    # Add the covariate variable
+    if not args.stationary or args.pick_best_model:
+        dparams[args.covariate] = covariate
+
+    infile_logs = {args.file: ds.attrs["history"]}
+    if isinstance(args.min_lead, str):
+        infile_logs[args.min_lead] = ds_min_lead.attrs["history"]
+    dparams.attrs["history"] = fileio.get_new_log(infile_logs=infile_logs)
+
+    if args.output_chunks:
+        dparams = dparams.chunk(args.output_chunks)
+
+    if "zarr" in args.outfile:
+        fileio.to_zarr(dparams, args.outfile)
+    else:
+        dparams.to_netcdf(args.outfile, compute=True)
+
+
+if __name__ == "__main__":
+    _main()
diff --git a/unseen/moments.py b/unseen/moments.py
index ae25070..2bb3fbf 100644
--- a/unseen/moments.py
+++ b/unseen/moments.py
@@ -183,7 +183,7 @@ def create_plot(
         random_sample = np.random.choice(da_fcst_stacked, sample_size)
         sample_moments = calc_moments(
             random_sample,
-            user_estimates=[
+            fitstart=[
                 moments_fcst["GEV shape"],
                 moments_fcst["GEV location"],
                 moments_fcst["GEV scale"],
@@ -196,7 +196,7 @@ def create_plot(
             bc_random_sample = np.random.choice(da_bc_fcst_stacked, sample_size)
             bc_sample_moments = calc_moments(
                 bc_random_sample,
-                user_estimates=[
+                fitstart=[
                     moments_fcst["GEV shape"],
                     moments_fcst["GEV location"],
                     moments_fcst["GEV scale"],
diff --git a/unseen/stability.py b/unseen/stability.py
index dba31ac..a9e4a4c 100644
--- a/unseen/stability.py
+++ b/unseen/stability.py
@@ -105,7 +105,7 @@ def return_curve(data, method, params=[], **kwargs):
     params : list, default None
         shape, location and scale parameters (calculated if None)
     kwargs : dict, optional
-        kwargs passed to eva.fit_gev (N.B. used to use generate_estimates=True)
+        kwargs passed to eva.fit_gev
     """
 
     if method == "empirical":
diff --git a/unseen/tests/conftest.py b/unseen/tests/conftest.py
index ce6a6cd..25aef26 100644
--- a/unseen/tests/conftest.py
+++ b/unseen/tests/conftest.py
@@ -2,6 +2,7 @@
 import dask.array as dsa
 import numpy as np
 import pytest
+from scipy.stats import genextreme
 import xarray as xr
 
 
@@ -101,7 +102,8 @@ def example_da_forecast(request):
             )
         )
     else:
-        data = np.random.random(
+        rng = np.random.default_rng(seed=0)
+        data = rng.random(
             (
                 len(init),
                 len(lead),
@@ -113,3 +115,58 @@ def example_da_forecast(request):
     return ds.assign_coords(
         {pytest.TIME_DIM: ([pytest.INIT_DIM, pytest.LEAD_DIM], time)}
     )
+
+
+@pytest.fixture()
+def example_da_gev(request):
+    """An example 1D GEV DataArray and distribution parameters."""
+    rng = np.random.default_rng(seed=0)
+    time = xr.cftime_range(start="2000-01-01", periods=1500, freq="D")
+
+    # Shape, location and scale parameters
+    shape = rng.uniform(-0.5, 0.5)
+    loc = rng.uniform(-10, 10)
+    scale = rng.uniform(0.1, 10)
+    dparams = shape, loc, scale
+
+    rvs = genextreme.rvs(shape, loc=loc, scale=scale, size=(time.size), random_state=0)
+    data = xr.DataArray(rvs, coords=[time], dims=[pytest.TIME_DIM])
+    if request.param == "dask":
+        data = data.chunk({pytest.TIME_DIM: -1})
+    elif request.param == "numpy":
+        data = data.values
+    return data, dparams
+
+
+@pytest.fixture()
+def example_da_gev_3d(request):
+    """An example 3D GEV DataArray and distribution parameters."""
+    rng = np.random.default_rng(seed=0)
+    time = xr.cftime_range(start="2000-01-01", periods=1500, freq="D")
+    lat = np.arange(2)
+    lon = np.arange(2)
+
+    # Shape, location and scale parameters
+    size = (len(lat), len(lon))
+    shape = rng.uniform(-0.5, 0.5, size=size)
+    loc = rng.uniform(-10, 10, size=size)
+    scale = rng.uniform(0.1, 10, size=size)
+    dparams = np.stack([shape, loc, scale], axis=-1)
+
+    rvs = genextreme.rvs(
+        shape,
+        loc=loc,
+        scale=scale,
+        size=(len(time), len(lat), len(lon)),
+        random_state=0,
+    )
+    data = xr.DataArray(
+        rvs,
+        coords=[time, lat, lon],
+        dims=[pytest.TIME_DIM, pytest.LAT_DIM, pytest.LON_DIM],
+    )
+    if request.param == "dask":
+        data = data.chunk({pytest.TIME_DIM: -1, pytest.LAT_DIM: 1, pytest.LON_DIM: 1})
+    elif request.param == "numpy":
+        data = data.values
+    return data, dparams
diff --git a/unseen/tests/test_eva.py b/unseen/tests/test_eva.py
index bba533f..d30f13e 100644
--- a/unseen/tests/test_eva.py
+++ b/unseen/tests/test_eva.py
@@ -1,191 +1,102 @@
 """Test extreme value analysis functions."""
 
-from matplotlib.dates import date2num
 import numpy as np
 import numpy.testing as npt
-from scipy.stats import genextreme
-from xarray import cftime_range, DataArray
+import pytest
+import xarray as xr
 
 from unseen.eva import fit_gev, get_return_period, get_return_level
 
-rtol = 0.3  # relative tolerance
-alpha = 0.05
-
-
-def example_da_gev_1d():
-    """An example 1D GEV DataArray and distribution parameters."""
-    time = cftime_range(start="2000-01-01", periods=1500, freq="D")
-
-    # Shape, location and scale parameters
-    np.random.seed(0)
-    shape = np.random.uniform()
-    loc = np.random.uniform(-10, 10)
-    scale = np.random.uniform(0.1, 10)
-    theta = shape, loc, scale
-
-    rvs = genextreme.rvs(shape, loc=loc, scale=scale, size=(time.size), random_state=0)
-    data = DataArray(rvs, coords=[time], dims=["time"])
-    return data, theta
-
-
-def example_da_gev_1d_dask():
-    """An example 1D GEV dask array and distribution parameters."""
-    data, theta = example_da_gev_1d()
-    data = data.chunk({"time": -1})
-    return data, theta
-
-
-def example_da_gev_3d():
-    """An example 3D GEV DataArray and distribution parameters."""
-    time = cftime_range(start="2000-01-01", periods=1500, freq="D")
-    lat = np.arange(2)
-    lon = np.arange(2)
-
-    # Shape, location and scale parameters
-    size = (len(lat), len(lon))
-    np.random.seed(0)
-    shape = np.random.uniform(size=size)
-    loc = np.random.uniform(-10, 10, size=size)
-    scale = np.random.uniform(0.1, 10, size=size)
-    theta = np.stack([shape, loc, scale], axis=-1)
-
-    rvs = genextreme.rvs(
-        shape,
-        loc=loc,
-        scale=scale,
-        size=(len(time), len(lat), len(lon)),
-        random_state=0,
-    )
-    data = DataArray(rvs, coords=[time, lat, lon], dims=["time", "lat", "lon"])
-    return data, theta
-
-
-def example_da_gev_3d_dask():
-    """An example 3D GEV dask array and its distribution parameters."""
-    data, theta = example_da_gev_3d()
-    data = data.chunk({"time": -1, "lat": 1, "lon": 1})
-    return data, theta
+rtol = 0.3  # relative tolerance for testing close values
 
 
 def add_example_gev_trend(data):
     trend = np.arange(data.time.size) * 2.5 / data.time.size
-    trend = DataArray(trend, coords={"time": data.time})
+    trend = xr.DataArray(trend, coords={"time": data.time})
     return data + trend
 
 
-def example_da_gev_forecast():
-    """Create example stacked forecast dataArray."""
-    ensemble = np.arange(3)
-    lead_time = np.arange(5)
-    init_date = cftime_range(start="2000-01-01", periods=24, freq="MS")
-    time = [
-        init_date.shift(i, freq="MS")[: len(lead_time)] for i in range(len(init_date))
-    ]
-
-    # Generate shape, location and scale parameters.
-    np.random.seed(2)
-    shape = np.random.uniform()
-    loc = np.random.uniform(-10, 10)
-    scale = np.random.uniform(0.1, 10)
-    theta = shape, loc, scale
-
-    rvs = genextreme.rvs(
-        shape,
-        loc=loc,
-        scale=scale,
-        size=(len(ensemble), len(init_date), len(lead_time)),
-        random_state=0,
+@pytest.mark.parametrize("example_da_gev", ["xarray", "numpy", "dask"], indirect=True)
+def test_fit_gev_1d(example_da_gev):
+    """Run stationary GEV fit using 1D array."""
+    data, dparams_i = example_da_gev
+    dparams = fit_gev(
+        data, stationary=True, assert_good_fit=False, pick_best_model=False
     )
-    data = DataArray(
-        rvs,
-        coords=[ensemble, init_date, lead_time],
-        dims=["ensemble", "init_date", "lead_time"],
-    )
-    data = data.assign_coords({"time": (["init_date", "lead_time"], time)})
-    data_stacked = data.stack({"sample": ["ensemble", "init_date", "lead_time"]})
-    return data_stacked, theta
-
-
-def test_fit_gev_1d():
-    """Run stationary fit using 1D array & check results."""
-    data, theta_i = example_da_gev_1d()
-    theta = fit_gev(data, stationary=True)
-    # Check fitted params match params used to create data
-    npt.assert_allclose(theta, theta_i, rtol=rtol)
-
-
-def test_fit_gev_1d_user_estimates():
-    """Run stationary fit using 1D array & user_estimates."""
-    data, theta_i = example_da_gev_1d()
-    user_estimates = list(theta_i)
-    theta = fit_gev(data, stationary=True, user_estimates=user_estimates)
     # Check fitted params match params used to create data
-    npt.assert_allclose(theta, theta_i, rtol=rtol)
-
-
-def test_fit_gev_1d_goodness():
-    """Run stationary fit using 1D array & fit_goodness_test."""
-    data, theta_i = example_da_gev_1d()
-    theta = fit_gev(data, stationary=True, test_fit_goodness=True)
-    # Check fitted params match params used to create data
-    npt.assert_allclose(theta, theta_i, rtol=rtol)
-
-
-def test_fit_gev_1d_numpy():
-    """Run stationary fit using 1D np.ndarray & check results."""
-    data, theta_i = example_da_gev_1d()
-    data = data.values
-    theta = fit_gev(data, stationary=True)
-    # Check fitted params match params used to create data
-    npt.assert_allclose(theta, theta_i, rtol=rtol)
-
-
-def test_fit_gev_1d_dask():
-    """Run stationary fit using 1D dask array & check results."""
-    data, theta_i = example_da_gev_1d_dask()
-    theta = fit_gev(data, stationary=True, core_dim="time")
+    npt.assert_allclose(dparams, dparams_i, rtol=rtol)
+
+
+@pytest.mark.parametrize("example_da_gev", ["xarray", "numpy", "dask"], indirect=True)
+@pytest.mark.parametrize(
+    "fitstart",
+    [
+        [1, -5, 1],
+        "LMM",
+        "scipy_fitstart",
+        "scipy",
+        "scipy_subset",
+        "xclim_fitstart",
+        "xclim",
+    ],
+)
+def test_fit_gev_1d_fitstart(example_da_gev, fitstart):
+    """Run stationary GEV fit using 1D array & fitstart method."""
+    data, dparams_i = example_da_gev
+    dparams = fit_gev(
+        data,
+        stationary=True,
+        fitstart=fitstart,
+        assert_good_fit=False,
+        pick_best_model=False,
+    )
     # Check fitted params match params used to create data
-    npt.assert_allclose(theta, theta_i, rtol=rtol)
+    npt.assert_allclose(dparams, dparams_i, rtol=rtol)
 
 
-def test_fit_gev_3d():
-    """Run stationary fit using 3D array & check results."""
-    data, theta_i = example_da_gev_3d()
-    theta = fit_gev(data, stationary=True, core_dim="time")
+@pytest.mark.parametrize("example_da_gev", ["xarray", "numpy", "dask"], indirect=True)
+def test_fit_gev_1d_assert_good_fit(example_da_gev):
+    """Run stationary GEV fit using 1D array & fit_goodness_test."""
+    data, dparams_i = example_da_gev
+    dparams = fit_gev(data, stationary=True, assert_good_fit=True)
     # Check fitted params match params used to create data
-    npt.assert_allclose(theta, theta_i, rtol=rtol)
+    npt.assert_allclose(dparams, dparams_i, rtol=0.3)
 
 
-def test_fit_gev_3d_dask():
-    """Run stationary fit using 3D dask array & check results."""
-    data, theta_i = example_da_gev_3d_dask()
-    theta = fit_gev(data, stationary=True, core_dim="time")
+# todo FAILED unseen/tests/test_eva.py::test_fit_gev_3d[xarray] - AssertionError:
+@pytest.mark.parametrize("example_da_gev_3d", ["xarray", "dask"], indirect=True)
+def test_fit_gev_3d(example_da_gev_3d):
+    """Run stationary GEV fit using 3D array & check results."""
+    data, dparams_i = example_da_gev_3d
+    dparams = fit_gev(data, stationary=True, fitstart="LMM", core_dim="time")
     # Check fitted params match params used to create data
-    npt.assert_allclose(theta, theta_i, rtol=rtol)
+    npt.assert_allclose(dparams, dparams_i, rtol=0.4)
 
 
-def test_fit_ns_gev_1d():
-    """Run non-stationary fit using 1D array & check results."""
-    data, _ = example_da_gev_1d()
+@pytest.mark.parametrize("example_da_gev", ["xarray", "dask"], indirect=True)
+def test_fit_ns_gev_1d(example_da_gev):
+    """Run non-stationary GEV fit using 1D array & check results."""
+    data, _ = example_da_gev
     data = add_example_gev_trend(data)
-    covariate = np.arange(data.time.size, dtype=int)
+    covariate = xr.DataArray(np.arange(data.time.size), dims="time")
 
-    theta = fit_gev(
+    dparams = fit_gev(
         data,
         stationary=False,
         core_dim="time",
         covariate=covariate,
     )
-    assert np.all(theta[2] > 0)  # Positive trend in location
+    assert np.all(dparams[2] > 0)  # Positive trend in location
 
 
-def test_fit_ns_gev_1d_loc_only():
-    """Run non-stationary fit using 1D array (location parameter only)."""
-    data, _ = example_da_gev_1d()
+@pytest.mark.parametrize("example_da_gev", ["xarray", "dask"], indirect=True)
+def test_fit_ns_gev_1d_loc_only(example_da_gev):
+    """Run non-stationary GEV fit using 1D array (location parameter only)."""
+    data, _ = example_da_gev
     data = add_example_gev_trend(data)
-    covariate = np.arange(data.time.size, dtype=int)
+    covariate = xr.DataArray(np.arange(data.time.size), dims="time")
 
-    theta = fit_gev(
+    dparams = fit_gev(
         data,
         stationary=False,
         core_dim="time",
@@ -193,17 +104,18 @@ def test_fit_ns_gev_1d_loc_only():
         scale1=None,
         covariate=covariate,
     )
-    assert np.all(theta[2] > 0)  # Positive trend in location
-    assert np.all(theta[4] == 0)  # No trend in scale
+    assert np.all(dparams[2] > 0)  # Positive trend in location
+    assert np.all(dparams[4] == 0)  # No trend in scale
 
 
-def test_fit_ns_gev_1d_scale_only():
-    """Run non-stationary fit using 1D array (scale parameter only)."""
-    data, _ = example_da_gev_1d()
+@pytest.mark.parametrize("example_da_gev", ["xarray"], indirect=True)
+def test_fit_ns_gev_1d_scale_only(example_da_gev):
+    """Run non-stationary GEV fit using 1D array (scale parameter only)."""
+    data, _ = example_da_gev
     data = add_example_gev_trend(data)
-    covariate = np.arange(data.time.size, dtype=int)
+    covariate = xr.DataArray(np.arange(data.time.size), dims="time")
 
-    theta = fit_gev(
+    dparams = fit_gev(
         data,
         stationary=False,
         core_dim="time",
@@ -211,158 +123,119 @@ def test_fit_ns_gev_1d_scale_only():
         scale1=0,
         covariate=covariate,
     )
-    assert np.all(theta[2] == 0)  # No trend in location
-    assert np.all(theta[4] != 0)  # Nonzero trend in scale
-
-
-def test_fit_ns_gev_1d_dask():
-    """Run non-stationary fit using 1D dask array & check results."""
-    data, _ = example_da_gev_1d_dask()
-    # Add a positive linear trend
-    data = add_example_gev_trend(data)
-    covariate = np.arange(data.time.size, dtype=int)
-    theta = fit_gev(data, stationary=False, covariate=covariate, core_dim="time")
-    assert np.all(theta[2] > 0)  # Positive trend in location
+    assert np.all(dparams[2] == 0)  # No trend in location
+    assert np.all(dparams[4] != 0)  # Nonzero trend in scale
 
 
-def test_fit_ns_gev_3d():
-    """Run non-stationary fit using 3D array & check results."""
-    data, _ = example_da_gev_3d()
+@pytest.mark.parametrize("example_da_gev_3d", ["xarray", "dask"], indirect=True)
+def test_fit_ns_gev_3d(example_da_gev_3d):
+    """Run non-stationary GEV fit using 3D array & check results."""
+    data, _ = example_da_gev_3d
     # Add a positive linear trend
     data = add_example_gev_trend(data)
-    covariate = np.arange(data.time.size, dtype=int)
-    theta = fit_gev(data, stationary=False, covariate=covariate, core_dim="time")
-    assert np.all(theta.isel(theta=2) > 0)  # Positive trend in location
+    covariate = xr.DataArray(np.arange(data.time.size), dims="time")
+    dparams = fit_gev(data, stationary=False, covariate=covariate, core_dim="time")
+    assert np.all(dparams.isel(dparams=2) > 0)  # Positive trend in location
 
 
-def test_fit_ns_gev_1d_relative_fit_test_bic_trend():
-    """Run non-stationary fit & check 'BIC' test returns nonstationary params."""
-    data, _ = example_da_gev_1d()
+@pytest.mark.parametrize("example_da_gev", ["xarray"], indirect=True)
+def test_fit_ns_gev_1d_pick_best_model_bic_trend(example_da_gev):
+    """Run non-stationary GEV fit & check 'BIC' test returns nonstationary params."""
+    data, _ = example_da_gev
     # Add a large positive linear trend
     data = add_example_gev_trend(data)
     data = add_example_gev_trend(data)
-    covariate = np.arange(data.time.size, dtype=int)
+    covariate = xr.DataArray(np.arange(data.time.size), dims="time")
 
-    theta = fit_gev(
+    dparams = fit_gev(
         data,
         stationary=False,
         core_dim="time",
         covariate=covariate,
-        relative_fit_test="bic",
+        pick_best_model="bic",
     )
-    assert np.all(theta[2] > 0)  # Positive trend in location
+    assert np.all(dparams[2] > 0)  # Positive trend in location
 
 
-def test_fit_ns_gev_1d_relative_fit_test_bic_no_trend():
-    """Run non-stationary fit & check 'BIC' test returns stationary params."""
-    data, _ = example_da_gev_1d()
-    covariate = np.arange(data.time.size, dtype=int)
+@pytest.mark.parametrize("example_da_gev", ["xarray"], indirect=True)
+def test_fit_ns_gev_1d_pick_best_model_bic_no_trend(example_da_gev):
+    """Run non-stationary GEV fit & check 'BIC' test returns stationary params."""
+    data, _ = example_da_gev
+    covariate = xr.DataArray(np.arange(data.time.size), dims="time")
 
-    theta = fit_gev(
+    dparams = fit_gev(
         data,
         stationary=False,
         core_dim="time",
         covariate=covariate,
-        relative_fit_test="bic",
+        pick_best_model="bic",
     )
-    assert np.all(theta[2] == 0)  # No trend in location
-    assert np.all(theta[4] == 0)  # No trend in scale
-
-
-def test_fit_ns_gev_3d_dask():
-    """Run non-stationary fit using 3D dask array & check results."""
-    data, _ = example_da_gev_3d_dask()
-    # Add a positive linear trend
-    data = add_example_gev_trend(data)
-    covariate = np.arange(data.time.size, dtype=int)
-    theta = fit_gev(data, stationary=False, covariate=covariate, core_dim="time")
-    assert np.all(theta.isel(theta=2) > 0)  # Positive trend in location
+    assert np.all(dparams[2] == 0)  # No trend in location
+    assert np.all(dparams[4] == 0)  # No trend in scale
 
 
-def test_fit_ns_gev_forecast():
-    """Run non-stationary fit using stacked forecast dataArray."""
-    data, _ = example_da_gev_forecast()
-    # Convert times to numerical timesteps
-    covariate = DataArray(date2num(data.time), coords={"sample": data.sample})
-    # Add a positive linear trend
-    trend = covariate / 1e2
-    data = data + trend
-    data = data.sortby(data.time)
-    covariate = covariate.sortby(data.time)
-    theta = fit_gev(data, stationary=False, covariate=covariate, core_dim="sample")
-    assert np.all(theta[2] > 0)  # Positive trend in location
-
-
-def test_get_return_period():
+@pytest.mark.parametrize("example_da_gev", ["xarray", "numpy", "dask"], indirect=True)
+def test_get_return_period(example_da_gev):
     """Run get_return_period for a single event using 1d data."""
-    data, _ = example_da_gev_1d()
-    event = data.mean()
-    rp = get_return_period(event, data=data)
-    assert rp.size == 1
-    assert np.all(np.isfinite(rp))
+    data, _ = example_da_gev
+    event = np.mean(data)
+    ari = get_return_period(event, data=data)
+    assert ari.size == 1
+    assert np.all(np.isfinite(ari))
 
 
-def test_get_return_period_1d():
-    """Run get_return_period for 1d array of events using 1d data."""
-    data, theta = example_da_gev_1d()
-    event = data.quantile([0.25, 0.5, 0.75], dim="time")
-    rp = get_return_period(event, theta)
-    assert rp.shape == event.shape
-    assert np.all(np.isfinite(rp))
-
-
-def test_get_return_period_3d():
+@pytest.mark.parametrize("example_da_gev_3d", ["xarray", "dask"], indirect=True)
+def test_get_return_period_3d(example_da_gev_3d):
     """Run get_return_period for 3d array of events using 3d data."""
-    data, theta = example_da_gev_3d()
-    theta = fit_gev(data, stationary=True)
+    data, dparams = example_da_gev_3d
+    dparams = fit_gev(data, stationary=True)
     # Multiple events unique to each lat/lon
     event = data.quantile([0.25, 0.5, 0.75], dim="time")
-    rp = get_return_period(event, theta)
-    assert rp.shape == event.shape
-    assert np.all(np.isfinite(rp))
+    ari = get_return_period(event, dparams)
+    assert ari.shape == event.shape
+    assert np.all(np.isfinite(ari))
 
 
-def test_get_return_period_3d_nonstationary():
+@pytest.mark.parametrize("example_da_gev_3d", ["xarray", "dask"], indirect=True)
+def test_get_return_period_3d_nonstationary(example_da_gev_3d):
     """Run get_return_period for 3d events using 3d nonstationary data."""
-    data, _ = example_da_gev_3d()
+    data, _ = example_da_gev_3d
     data = add_example_gev_trend(data)
-    covariate = DataArray(np.arange(data.time.size), dims="time")
+    covariate = xr.DataArray(np.arange(data.time.size), dims="time")
     params = fit_gev(data, stationary=False, covariate=covariate, core_dim="time")
 
     # Multiple events unique to each lat/lon
     event = data.quantile([0.25, 0.5, 0.75], dim="time")
-    covariate_subset = DataArray([0, covariate.size], dims="time")
-    rp = get_return_period(event, params, covariate=covariate_subset)
-    assert rp.shape == (*list(event.shape), covariate_subset.size)
-    assert np.all(np.isfinite(rp))
-
-
-def test_get_return_level():
-    """Run get_return_level for a single return_period using 1d data."""
-    _, theta = example_da_gev_1d()
-    rp = 100
-    return_level = get_return_level(rp, theta)
-    assert return_level.size == 1
-    assert np.all(np.isfinite(return_level))
+    covariate_subset = xr.DataArray([0, covariate.size], dims="time")
+    ari = get_return_period(event, params, covariate=covariate_subset)
+    assert ari.shape == (*list(event.shape), covariate_subset.size)
+    assert np.all(np.isfinite(ari))
 
 
-def test_get_return_level_1d():
+@pytest.mark.parametrize("example_da_gev", ["xarray", "numpy", "dask"], indirect=True)
+@pytest.mark.parametrize("ari", [100, np.array([10, 100, 1000])])
+def test_get_return_level(example_da_gev, ari):
     """Run get_return_level for 1d array of periods using 1d data."""
-    _, theta = example_da_gev_1d()
-    rp = np.array([10, 100, 1000])
-    return_level = get_return_level(rp, theta)
-    assert return_level.shape == rp.shape
+    _, dparams = example_da_gev
+    return_level = get_return_level(ari, dparams)
+    if isinstance(ari, int):
+        assert return_level.size == 1
+    else:
+        assert return_level.shape == ari.shape
     assert np.all(np.isfinite(return_level))
 
 
-def test_get_return_level_3d():
+@pytest.mark.parametrize("example_da_gev_3d", ["xarray", "dask"], indirect=True)
+def test_get_return_level_3d(example_da_gev_3d):
     """Run get_return_level for 3d array of periods using 3d data."""
-    data, theta = example_da_gev_3d()
-    theta = fit_gev(data, stationary=True)
+    data, dparams = example_da_gev_3d
+    dparams = fit_gev(data, stationary=True, core_dim="time")
+
     # Multiple events unique to each lat/lon
-    dims = ("return_period", "lat", "lon")
-    rp = np.array([10, 100, 1000] * 4).T
-    rp = DataArray(rp.reshape((3, 2, 2)), dims=dims)
-    return_level = get_return_level(rp, theta)
-    assert return_level.shape == rp.shape
+    dims = ("lat", "lon", "return_period")
+    ari = np.array([10, 100, 1000] * 4).T
+    ari = xr.DataArray(ari.reshape(dparams.shape), dims=dims)
+    return_level = get_return_level(ari, dparams)
+
+    assert return_level.shape == ari.shape
     assert np.all(np.isfinite(return_level))