diff --git a/NEWS.md b/NEWS.md index 1f53c57f1..50808d8f0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,13 +15,16 @@ the CSV. (#394) troubleshooting, debugging and development. See end of *How does CmdStanR work?* vignette for details. (#392) -* New `loo()` method for CmdStanMCMC objects. Requires computing pointwise +* New `$loo()` method for CmdStanMCMC objects. Requires computing pointwise log-likelihood in Stan program. (#366) * The `fitted_params` argument to the `$generate_quantities()` method now also accepts CmdStanVB, `posterior::draws_array`, and `posterior::draws_matrix` objects. (#390) +* The `$optimize()` method now supports all of CmdStan's tolerance-related +arguments for (L)BFGS. (#398) + # cmdstanr 0.2.2 ### Bug fixes diff --git a/R/args.R b/R/args.R index 1f6b9a1cb..aa03b1068 100644 --- a/R/args.R +++ b/R/args.R @@ -343,12 +343,24 @@ OptimizeArgs <- R6::R6Class( lock_objects = FALSE, public = list( method = "optimize", - initialize = function(algorithm = NULL, + initialize = function(iter = NULL, + algorithm = NULL, init_alpha = NULL, - iter = NULL) { + tol_obj = NULL, + tol_rel_obj = NULL, + tol_grad = NULL, + tol_rel_grad = NULL, + tol_param = NULL, + history_size = NULL) { self$algorithm <- algorithm - self$init_alpha <- init_alpha self$iter <- iter + self$init_alpha <- init_alpha + self$tol_obj <- tol_obj + self$tol_rel_obj <- tol_rel_obj + self$tol_grad <- tol_grad + self$tol_rel_grad <- tol_rel_grad + self$tol_param <- tol_param + self$history_size <- history_size invisible(self) }, validate = function(num_procs) { @@ -365,9 +377,15 @@ OptimizeArgs <- R6::R6Class( } new_args <- list( "method=optimize", + .make_arg("iter"), .make_arg("algorithm"), .make_arg("init_alpha"), - .make_arg("iter") + .make_arg("tol_obj"), + .make_arg("tol_rel_obj"), + .make_arg("tol_grad"), + .make_arg("tol_rel_grad"), + .make_arg("tol_param"), + .make_arg("history_size") ) new_args <- do.call(c, new_args) c(args, new_args) @@ -577,14 +595,33 @@ validate_sample_args <- function(self, num_procs) { validate_optimize_args <- function(self) { checkmate::assert_subset(self$algorithm, empty.ok = TRUE, choices = c("bfgs", "lbfgs", "newton")) - checkmate::assert_integerish(self$iter, lower = 0, null.ok = TRUE, len = 1) + checkmate::assert_integerish(self$iter, lower = 1, null.ok = TRUE, len = 1) if (!is.null(self$iter)) { self$iter <- as.integer(self$iter) } - checkmate::assert_number(self$init_alpha, lower = 0, null.ok = TRUE) - if (!is.null(self$init_alpha) && isTRUE(self$algorithm == "newton")) { - stop("'init_alpha' can't be used when algorithm is 'newton'.", - call. = FALSE) + + # check args only available for lbfgs and bfgs + bfgs_args <- c("init_alpha", "tol_obj", "tol_rel_obj", "tol_grad", "tol_rel_grad", "tol_param") + for (arg in bfgs_args) { + # check that arg is positive or NULL and that algorithm='lbfgs' or 'bfgs' is + # explicitly specified (error if not or if 'newton') + if (!is.null(self[[arg]]) && is.null(self$algorithm)) { + stop("Please specify 'algorithm' in order to use '", arg, "'.", call. = FALSE) + } + if (!is.null(self[[arg]]) && isTRUE(self$algorithm == "newton")) { + stop("'", arg, "' can't be used when algorithm is 'newton'.", call. = FALSE) + } + checkmate::assert_number(self[[arg]], .var.name = arg, lower = 0, null.ok = TRUE) + } + + # history_size only available for lbfgs + if (!is.null(self$history_size)) { + if (!isTRUE(self$algorithm == "lbfgs")) { + stop("'history_size' is only allowed if 'algorithm' is specified as 'lbfgs'.", call. = FALSE) + } else { + checkmate::assert_integerish(self$history_size, lower = 1, len = 1, null.ok = FALSE) + self$history_size <- as.integer(self$history_size) + } } invisible(TRUE) diff --git a/R/model.R b/R/model.R index 929e38cfb..4b92060cc 100644 --- a/R/model.R +++ b/R/model.R @@ -691,6 +691,7 @@ CmdStanModel$set("public", name = "check_syntax", value = check_syntax_method) #' init = NULL, #' save_latent_dynamics = FALSE, #' output_dir = NULL, +#' sig_figs = NULL, #' chains = 4, #' parallel_chains = getOption("mc.cores", 1), #' chain_ids = seq_len(chains), @@ -710,7 +711,6 @@ CmdStanModel$set("public", name = "check_syntax", value = check_syntax_method) #' term_buffer = NULL, #' window = NULL, #' fixed_param = FALSE, -#' sig_figs = NULL, #' validate_csv = TRUE, #' show_messages = TRUE #' ) @@ -763,6 +763,8 @@ CmdStanModel$set("public", name = "check_syntax", value = check_syntax_method) #' although some names are slightly different. They are described briefly here #' and in greater detail in the CmdStan manual. Arguments left at `NULL` #' default to the default used by the installed version of CmdStan. +#' The latest [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/) +#' will have the default values for the latest version of CmdStan. #' #' * `iter_sampling`: (positive integer) The number of post-warmup iterations to #' run per chain. @@ -834,6 +836,7 @@ sample_method <- function(data = NULL, init = NULL, save_latent_dynamics = FALSE, output_dir = NULL, + sig_figs = NULL, chains = 4, parallel_chains = getOption("mc.cores", 1), chain_ids = seq_len(chains), @@ -853,7 +856,6 @@ sample_method <- function(data = NULL, term_buffer = NULL, window = NULL, fixed_param = FALSE, - sig_figs = NULL, validate_csv = TRUE, show_messages = TRUE, # deprecated @@ -952,8 +954,8 @@ sample_method <- function(data = NULL, init = init, refresh = refresh, output_dir = output_dir, - validate_csv = validate_csv, - sig_figs = sig_figs + sig_figs = sig_figs, + validate_csv = validate_csv ) cmdstan_procs <- CmdStanMCMCProcs$new( num_procs = chains, @@ -1013,6 +1015,7 @@ CmdStanModel$set("public", name = "sample", value = sample_method) #' init = NULL, #' save_latent_dynamics = FALSE, #' output_dir = NULL, +#' sig_figs = NULL, #' chains = 4, #' parallel_chains = getOption("mc.cores", 1), #' chain_ids = seq_len(chains), @@ -1031,7 +1034,6 @@ CmdStanModel$set("public", name = "sample", value = sample_method) #' term_buffer = NULL, #' window = NULL, #' fixed_param = FALSE, -#' sig_figs = NULL, #' validate_csv = TRUE, #' show_messages = TRUE #' ) @@ -1159,7 +1161,7 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method) #' constrained variables, which shifts the mode due to the change of #' variables. Thus modes correspond to modes of the model as written. #' -#' -- [*CmdStan Interface User's Guide*](https://github.com/stan-dev/cmdstan/releases/latest) +#' -- [*CmdStan User's Guide*](https://mc-stan.org/docs/cmdstan-guide/) #' #' @section Usage: #' ``` @@ -1170,11 +1172,17 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method) #' init = NULL, #' save_latent_dynamics = FALSE, #' output_dir = NULL, +#' sig_figs = NULL, #' threads = NULL, #' algorithm = NULL, #' init_alpha = NULL, #' iter = NULL, -#' sig_figs = NULL +#' tol_obj = NULL, +#' tol_rel_obj = NULL, +#' tol_grad = NULL, +#' tol_rel_grad = NULL, +#' tol_param = NULL, +#' history_size = NULL #' ) #' ``` #' @@ -1184,16 +1192,27 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method) #' arguments. These arguments are described briefly here and in greater detail #' in the CmdStan manual. Arguments left at `NULL` default to the default used #' by the installed version of CmdStan. +#' The latest [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/) +#' will have the defaults for the latest version of CmdStan. #' #' * `threads`: (positive integer) If the model was #' [compiled][model-method-compile] with threading support, the number of #' threads to use in parallelized sections (e.g., when #' using the Stan functions `reduce_sum()` or `map_rect()`). +#' * `iter`: (positive integer) The maximum number of iterations. #' * `algorithm`: (string) The optimization algorithm. One of `"lbfgs"`, -#' `"bfgs"`, or `"newton"`. -#' * `iter`: (positive integer) The number of iterations. -#' * `init_alpha`: (nonnegative real) The line search step size for first -#' iteration. Not applicable if `algorithm="newton"`. +#' `"bfgs"`, or `"newton"`. The control parameters below are only available +#' for `"lbfgs"` and `"bfgs`. For their default values and more details see +#' the CmdStan User's Guide. The default values can also be obtained by +#' running `cmdstanr_example(method="optimize")$metadata()`. +#' * `init_alpha`: (positive real) The initial step size parameter. +#' * `tol_obj`: (positive real) Convergence tolerance on changes in objective function value. +#' * `tol_rel_obj`: (positive real) Convergence tolerance on relative changes in objective function value. +#' * `tol_grad`: (positive real) Convergence tolerance on the norm of the gradient. +#' * `tol_rel_grad`: (positive real) Convergence tolerance on the relative norm of the gradient. +#' * `tol_param`: (positive real) Convergence tolerance on changes in parameter value. +#' * `history_size`: (positive integer) The size of the history used when +#' approximating the Hessian. Only available for L-BFGS. #' #' @section Value: The `$optimize()` method returns a [`CmdStanMLE`] object. #' @@ -1208,11 +1227,17 @@ optimize_method <- function(data = NULL, init = NULL, save_latent_dynamics = FALSE, output_dir = NULL, + sig_figs = NULL, threads = NULL, algorithm = NULL, init_alpha = NULL, iter = NULL, - sig_figs = NULL) { + tol_obj = NULL, + tol_rel_obj = NULL, + tol_grad = NULL, + tol_rel_grad = NULL, + tol_param = NULL, + history_size = NULL) { checkmate::assert_integerish(threads, lower = 1, len = 1, null.ok = TRUE) if (is.null(self$cpp_options()[["stan_threads"]])) { if (!is.null(threads)) { @@ -1231,7 +1256,13 @@ optimize_method <- function(data = NULL, optimize_args <- OptimizeArgs$new( algorithm = algorithm, init_alpha = init_alpha, - iter = iter + iter = iter, + tol_obj = tol_obj, + tol_rel_obj = tol_rel_obj, + tol_grad = tol_grad, + tol_rel_grad = tol_rel_grad, + tol_param = tol_param, + history_size = history_size ) cmdstan_args <- CmdStanArgs$new( method_args = optimize_args, @@ -1286,6 +1317,7 @@ CmdStanModel$set("public", name = "optimize", value = optimize_method) #' init = NULL, #' save_latent_dynamics = FALSE, #' output_dir = NULL, +#' sig_figs = NULL, #' threads = NULL, #' algorithm = NULL, #' iter = NULL, @@ -1296,8 +1328,7 @@ CmdStanModel$set("public", name = "optimize", value = optimize_method) #' adapt_iter = NULL, #' tol_rel_obj = NULL, #' eval_elbo = NULL, -#' output_samples = NULL, -#' sig_figs = NULL +#' output_samples = NULL #' ) #' ``` #' @@ -1343,6 +1374,7 @@ variational_method <- function(data = NULL, init = NULL, save_latent_dynamics = FALSE, output_dir = NULL, + sig_figs = NULL, threads = NULL, algorithm = NULL, iter = NULL, @@ -1353,8 +1385,7 @@ variational_method <- function(data = NULL, adapt_iter = NULL, tol_rel_obj = NULL, eval_elbo = NULL, - output_samples = NULL, - sig_figs = NULL) { + output_samples = NULL) { checkmate::assert_integerish(threads, lower = 1, len = 1, null.ok = TRUE) if (is.null(self$cpp_options()[["stan_threads"]])) { if (!is.null(threads)) { @@ -1424,9 +1455,9 @@ CmdStanModel$set("public", name = "variational", value = variational_method) #' data = NULL, #' seed = NULL, #' output_dir = NULL, +#' sig_figs = NULL, #' parallel_chains = getOption("mc.cores", 1), -#' threads_per_chain = NULL, -#' sig_figs = NULL +#' threads_per_chain = NULL #' ) #' ``` #' @@ -1436,7 +1467,7 @@ CmdStanModel$set("public", name = "variational", value = variational_method) #' - A [posterior::draws_array] (for MCMC) or [posterior::draws_matrix] (for VB) #' object returned by CmdStanR's [`$draws()`][fit-method-draws] method. #' - A character vector of paths to CmdStan CSV output files. -#' * `data`, `seed`, `output_dir`, `parallel_chains`, `threads_per_chain`, `sig_figs`: +#' * `data`, `seed`, `output_dir`, `sig_figs`, `parallel_chains`, `threads_per_chain`: #' Same as for the [`$sample()`][model-method-sample] method. #' #' @section Value: The `$generate_quantities()` method returns a [`CmdStanGQ`] object. @@ -1492,9 +1523,9 @@ generate_quantities_method <- function(fitted_params, data = NULL, seed = NULL, output_dir = NULL, + sig_figs = NULL, parallel_chains = getOption("mc.cores", 1), - threads_per_chain = NULL, - sig_figs = NULL) { + threads_per_chain = NULL) { checkmate::assert_integerish(parallel_chains, lower = 1, null.ok = TRUE) checkmate::assert_integerish(threads_per_chain, lower = 1, len = 1, null.ok = TRUE) if (is.null(self$cpp_options()[["stan_threads"]])) { diff --git a/man-roxygen/model-common-args.R b/man-roxygen/model-common-args.R index 7ba913593..24e90d3b1 100644 --- a/man-roxygen/model-common-args.R +++ b/man-roxygen/model-common-args.R @@ -52,7 +52,7 @@ #' methods like `$save_latent_dynamics_files()`). #' * `sig_figs`: (positive integer) The number of significant figures used #' for the output values. By default, CmdStan represent the output values with -#' 6 significant figures. The upper limit for `sig_figs` is 18. Increasing -#' this value can cause an increased usage of disk space due to larger +#' 6 significant figures. The upper limit for `sig_figs` is 18. Increasing +#' this value can cause an increased usage of disk space due to larger #' output CSV files. -#' +#' diff --git a/man/model-method-generate-quantities.Rd b/man/model-method-generate-quantities.Rd index cac349bc8..1fb59523c 100644 --- a/man/model-method-generate-quantities.Rd +++ b/man/model-method-generate-quantities.Rd @@ -15,9 +15,9 @@ based on previously fitted parameters. data = NULL, seed = NULL, output_dir = NULL, + sig_figs = NULL, parallel_chains = getOption("mc.cores", 1), - threads_per_chain = NULL, - sig_figs = NULL + threads_per_chain = NULL ) } } @@ -32,7 +32,7 @@ based on previously fitted parameters. object returned by CmdStanR's \code{\link[=fit-method-draws]{$draws()}} method. \item A character vector of paths to CmdStan CSV output files. } -\item \code{data}, \code{seed}, \code{output_dir}, \code{parallel_chains}, \code{threads_per_chain}, \code{sig_figs}: +\item \code{data}, \code{seed}, \code{output_dir}, \code{sig_figs}, \code{parallel_chains}, \code{threads_per_chain}: Same as for the \code{\link[=model-method-sample]{$sample()}} method. } } diff --git a/man/model-method-optimize.Rd b/man/model-method-optimize.Rd index a98131cb9..dbd82dd13 100644 --- a/man/model-method-optimize.Rd +++ b/man/model-method-optimize.Rd @@ -17,7 +17,7 @@ optimization, the mode is calculated without the Jacobian adjustment for constrained variables, which shifts the mode due to the change of variables. Thus modes correspond to modes of the model as written. --- \href{https://github.com/stan-dev/cmdstan/releases/latest}{\emph{CmdStan Interface User's Guide}} +-- \href{https://mc-stan.org/docs/cmdstan-guide/}{\emph{CmdStan User's Guide}} } \section{Usage}{ \preformatted{$optimize( @@ -27,11 +27,17 @@ variables. Thus modes correspond to modes of the model as written. init = NULL, save_latent_dynamics = FALSE, output_dir = NULL, + sig_figs = NULL, threads = NULL, algorithm = NULL, init_alpha = NULL, iter = NULL, - sig_figs = NULL + tol_obj = NULL, + tol_rel_obj = NULL, + tol_grad = NULL, + tol_rel_grad = NULL, + tol_param = NULL, + history_size = NULL ) } } @@ -110,16 +116,27 @@ arguments above, the \verb{$optimize()} method also has its own set of arguments. These arguments are described briefly here and in greater detail in the CmdStan manual. Arguments left at \code{NULL} default to the default used by the installed version of CmdStan. +The latest \href{https://mc-stan.org/docs/cmdstan-guide/}{CmdStan User’s Guide} +will have the defaults for the latest version of CmdStan. \itemize{ \item \code{threads}: (positive integer) If the model was \link[=model-method-compile]{compiled} with threading support, the number of threads to use in parallelized sections (e.g., when using the Stan functions \code{reduce_sum()} or \code{map_rect()}). +\item \code{iter}: (positive integer) The maximum number of iterations. \item \code{algorithm}: (string) The optimization algorithm. One of \code{"lbfgs"}, -\code{"bfgs"}, or \code{"newton"}. -\item \code{iter}: (positive integer) The number of iterations. -\item \code{init_alpha}: (nonnegative real) The line search step size for first -iteration. Not applicable if \code{algorithm="newton"}. +\code{"bfgs"}, or \code{"newton"}. The control parameters below are only available +for \code{"lbfgs"} and \verb{"bfgs}. For their default values and more details see +the CmdStan User's Guide. The default values can also be obtained by +running \code{cmdstanr_example(method="optimize")$metadata()}. +\item \code{init_alpha}: (positive real) The initial step size parameter. +\item \code{tol_obj}: (positive real) Convergence tolerance on changes in objective function value. +\item \code{tol_rel_obj}: (positive real) Convergence tolerance on relative changes in objective function value. +\item \code{tol_grad}: (positive real) Convergence tolerance on the norm of the gradient. +\item \code{tol_rel_grad}: (positive real) Convergence tolerance on the relative norm of the gradient. +\item \code{tol_param}: (positive real) Convergence tolerance on changes in parameter value. +\item \code{history_size}: (positive integer) The size of the history used when +approximating the Hessian. Only available for L-BFGS. } } diff --git a/man/model-method-sample.Rd b/man/model-method-sample.Rd index c1080887e..29b836de8 100644 --- a/man/model-method-sample.Rd +++ b/man/model-method-sample.Rd @@ -18,6 +18,7 @@ some data. init = NULL, save_latent_dynamics = FALSE, output_dir = NULL, + sig_figs = NULL, chains = 4, parallel_chains = getOption("mc.cores", 1), chain_ids = seq_len(chains), @@ -37,7 +38,6 @@ some data. term_buffer = NULL, window = NULL, fixed_param = FALSE, - sig_figs = NULL, validate_csv = TRUE, show_messages = TRUE ) @@ -153,6 +153,8 @@ The rest of the arguments correspond to arguments offered by CmdStan, although some names are slightly different. They are described briefly here and in greater detail in the CmdStan manual. Arguments left at \code{NULL} default to the default used by the installed version of CmdStan. +The latest \href{https://mc-stan.org/docs/cmdstan-guide/}{CmdStan User’s Guide} +will have the default values for the latest version of CmdStan. \itemize{ \item \code{iter_sampling}: (positive integer) The number of post-warmup iterations to run per chain. diff --git a/man/model-method-sample_mpi.Rd b/man/model-method-sample_mpi.Rd index b8b933d4c..87c26044e 100644 --- a/man/model-method-sample_mpi.Rd +++ b/man/model-method-sample_mpi.Rd @@ -46,6 +46,7 @@ of processes with \code{mpi_args = list("n" = 4)}. init = NULL, save_latent_dynamics = FALSE, output_dir = NULL, + sig_figs = NULL, chains = 4, parallel_chains = getOption("mc.cores", 1), chain_ids = seq_len(chains), @@ -64,7 +65,6 @@ of processes with \code{mpi_args = list("n" = 4)}. term_buffer = NULL, window = NULL, fixed_param = FALSE, - sig_figs = NULL, validate_csv = TRUE, show_messages = TRUE ) diff --git a/man/model-method-variational.Rd b/man/model-method-variational.Rd index 9b7e61861..1a565d78d 100644 --- a/man/model-method-variational.Rd +++ b/man/model-method-variational.Rd @@ -26,6 +26,7 @@ matrix for the approximation. init = NULL, save_latent_dynamics = FALSE, output_dir = NULL, + sig_figs = NULL, threads = NULL, algorithm = NULL, iter = NULL, @@ -36,8 +37,7 @@ matrix for the approximation. adapt_iter = NULL, tol_rel_obj = NULL, eval_elbo = NULL, - output_samples = NULL, - sig_figs = NULL + output_samples = NULL ) } } diff --git a/tests/testthat/test-model-optimize.R b/tests/testthat/test-model-optimize.R index 6b447f41d..9030ce235 100644 --- a/tests/testthat/test-model-optimize.R +++ b/tests/testthat/test-model-optimize.R @@ -74,10 +74,66 @@ test_that("optimize() method errors for any invalid argument before calling cmds } }) -test_that("optimize() errors when combining 'newton' with 'init_alpha'", { +test_that("optimize() errors with bad combination of arguments", { skip_on_cran() + + # check a few examples (if these errors are correct then they will be correct + # for all similar args because of how it's implemented) + expect_error( + mod$optimize(data = data_list, algorithm = "newton", tol_grad = 0.1), + "'tol_grad' can't be used when algorithm is 'newton'" + ) + expect_error( + mod$optimize(data = data_list, algorithm = "bfgs", tol_obj = -10), + "not >= 0" + ) + expect_error( + mod$optimize(data = data_list, init_alpha = 0.1), + "Please specify 'algorithm' in order to use 'init_alpha'" + ) + + # history size only allowed with lbfgs and must be positive integer + expect_error( + mod$optimize(data = data_list, history_size = 1), + "'history_size' is only allowed if 'algorithm' is specified as 'lbfgs'" + ) expect_error( - mod$optimize(data = data_list, algorithm = "newton", init_alpha = 0.1), - "'init_alpha' can't be used when algorithm is 'newton'" + mod$optimize(data = data_list, algorithm = "bfgs", history_size = 1), + "'history_size' is only allowed if 'algorithm' is specified as 'lbfgs'" + ) + expect_error( + mod$optimize(data = data_list, algorithm = "lbfgs", history_size = 1.5), + "Must be of type 'integerish'" + ) + expect_error( + mod$optimize(data = data_list, algorithm = "lbfgs", history_size = -1), + "not >= 1" ) }) + +test_that("optimize() works with (L-)BFGS tolerances specified", { + skip_on_cran() + expect_optim_output( + fit <- mod$optimize( + data = data_list, + algorithm = "lbfgs", + # using values that aren't the defaults + init_alpha = 0.002, + tol_obj = 2e-11, + tol_rel_obj = 10001, + tol_grad = 5e-07, + tol_rel_grad = 10000001, + tol_param = 5e-07, + history_size = 6 + ) + ) + metadata <- fit$metadata() + expect_equal(metadata$init_alpha, 0.002) + expect_equal(metadata$tol_obj, 2e-11) + expect_equal(metadata$tol_rel_obj, 10001) + expect_equal(metadata$tol_grad, 5e-07) + expect_equal(metadata$tol_rel_grad, 10000001) + expect_equal(metadata$tol_param, 5e-07) + expect_equal(metadata$history_size, 6) +}) +