Merge pull request #398 from stan-dev/optimize-tol-args

add tolerance args for (l)bfgs
stan-dev · Dec 12, 2020 · d0c0f77 · d0c0f77
2 parents 5a02481 + 1e0cb7f
commit d0c0f77
Show file tree

Hide file tree

Showing 10 changed files with 197 additions and 51 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -15,13 +15,16 @@ the CSV. (#394)
 troubleshooting, debugging and development. See end of *How does CmdStanR work?*
 vignette for details. (#392)
 
-* New `loo()` method for CmdStanMCMC objects. Requires computing pointwise
+* New `$loo()` method for CmdStanMCMC objects. Requires computing pointwise
 log-likelihood in Stan program. (#366)
 
 * The `fitted_params` argument to the `$generate_quantities()` method now also
 accepts CmdStanVB, `posterior::draws_array`, and `posterior::draws_matrix`
 objects. (#390)
 
+* The `$optimize()` method now supports all of CmdStan's tolerance-related
+arguments for (L)BFGS. (#398)
+
 # cmdstanr 0.2.2
 
 ### Bug fixes

diff --git a/R/args.R b/R/args.R
@@ -343,12 +343,24 @@ OptimizeArgs <- R6::R6Class(
   lock_objects = FALSE,
   public = list(
     method = "optimize",
-    initialize = function(algorithm = NULL,
+    initialize = function(iter = NULL,
+                          algorithm = NULL,
                           init_alpha = NULL,
-                          iter = NULL) {
+                          tol_obj = NULL,
+                          tol_rel_obj = NULL,
+                          tol_grad = NULL,
+                          tol_rel_grad = NULL,
+                          tol_param = NULL,
+                          history_size = NULL) {
       self$algorithm <- algorithm
-      self$init_alpha <- init_alpha
       self$iter <- iter
+      self$init_alpha <- init_alpha
+      self$tol_obj <- tol_obj
+      self$tol_rel_obj <- tol_rel_obj
+      self$tol_grad <- tol_grad
+      self$tol_rel_grad <- tol_rel_grad
+      self$tol_param <- tol_param
+      self$history_size <- history_size
       invisible(self)
     },
     validate = function(num_procs) {
@@ -365,9 +377,15 @@ OptimizeArgs <- R6::R6Class(
       }
       new_args <- list(
         "method=optimize",
+        .make_arg("iter"),
         .make_arg("algorithm"),
         .make_arg("init_alpha"),
-        .make_arg("iter")
+        .make_arg("tol_obj"),
+        .make_arg("tol_rel_obj"),
+        .make_arg("tol_grad"),
+        .make_arg("tol_rel_grad"),
+        .make_arg("tol_param"),
+        .make_arg("history_size")
       )
       new_args <- do.call(c, new_args)
       c(args, new_args)
@@ -577,14 +595,33 @@ validate_sample_args <- function(self, num_procs) {
 validate_optimize_args <- function(self) {
   checkmate::assert_subset(self$algorithm, empty.ok = TRUE,
                            choices = c("bfgs", "lbfgs", "newton"))
-  checkmate::assert_integerish(self$iter, lower = 0, null.ok = TRUE, len = 1)
+  checkmate::assert_integerish(self$iter, lower = 1, null.ok = TRUE, len = 1)
   if (!is.null(self$iter)) {
     self$iter <- as.integer(self$iter)
   }
-  checkmate::assert_number(self$init_alpha, lower = 0, null.ok = TRUE)
-  if (!is.null(self$init_alpha) && isTRUE(self$algorithm == "newton")) {
-    stop("'init_alpha' can't be used when algorithm is 'newton'.",
-         call. = FALSE)
+
+  # check args only available for lbfgs and bfgs
+  bfgs_args <- c("init_alpha", "tol_obj", "tol_rel_obj", "tol_grad", "tol_rel_grad", "tol_param")
+  for (arg in bfgs_args) {
+    # check that arg is positive or NULL and that algorithm='lbfgs' or 'bfgs' is
+    # explicitly specified (error if not or if 'newton')
+    if (!is.null(self[[arg]]) && is.null(self$algorithm)) {
+      stop("Please specify 'algorithm' in order to use '", arg, "'.", call. = FALSE)
+    }
+    if (!is.null(self[[arg]]) && isTRUE(self$algorithm == "newton")) {
+      stop("'", arg, "' can't be used when algorithm is 'newton'.", call. = FALSE)
+    }
+    checkmate::assert_number(self[[arg]], .var.name = arg, lower = 0, null.ok = TRUE)
+  }
+
+  # history_size only available for lbfgs
+  if (!is.null(self$history_size)) {
+    if (!isTRUE(self$algorithm == "lbfgs")) {
+      stop("'history_size' is only allowed if 'algorithm' is specified as 'lbfgs'.", call. = FALSE)
+    } else {
+      checkmate::assert_integerish(self$history_size, lower = 1, len = 1, null.ok = FALSE)
+      self$history_size <- as.integer(self$history_size)
+    }
   }
 
   invisible(TRUE)

diff --git a/R/model.R b/R/model.R
@@ -691,6 +691,7 @@ CmdStanModel$set("public", name = "check_syntax", value = check_syntax_method)
 #'     init = NULL,
 #'     save_latent_dynamics = FALSE,
 #'     output_dir = NULL,
+#'     sig_figs = NULL,
 #'     chains = 4,
 #'     parallel_chains = getOption("mc.cores", 1),
 #'     chain_ids = seq_len(chains),
@@ -710,7 +711,6 @@ CmdStanModel$set("public", name = "check_syntax", value = check_syntax_method)
 #'     term_buffer = NULL,
 #'     window = NULL,
 #'     fixed_param = FALSE,
-#'     sig_figs = NULL,
 #'     validate_csv = TRUE,
 #'     show_messages = TRUE
 #'   )
@@ -763,6 +763,8 @@ CmdStanModel$set("public", name = "check_syntax", value = check_syntax_method)
 #'   although some names are slightly different. They are described briefly here
 #'   and in greater detail in the CmdStan manual. Arguments left at `NULL`
 #'   default to the default used by the installed version of CmdStan.
+#'   The latest [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/)
+#'   will have the default values for the latest version of CmdStan.
 #'
 #'   * `iter_sampling`: (positive integer) The number of post-warmup iterations to
 #'   run per chain.
@@ -834,6 +836,7 @@ sample_method <- function(data = NULL,
                           init = NULL,
                           save_latent_dynamics = FALSE,
                           output_dir = NULL,
+                          sig_figs = NULL,
                           chains = 4,
                           parallel_chains = getOption("mc.cores", 1),
                           chain_ids = seq_len(chains),
@@ -853,7 +856,6 @@ sample_method <- function(data = NULL,
                           term_buffer = NULL,
                           window = NULL,
                           fixed_param = FALSE,
-                          sig_figs = NULL,
                           validate_csv = TRUE,
                           show_messages = TRUE,
                           # deprecated
@@ -952,8 +954,8 @@ sample_method <- function(data = NULL,
     init = init,
     refresh = refresh,
     output_dir = output_dir,
-    validate_csv = validate_csv,
-    sig_figs = sig_figs
+    sig_figs = sig_figs,
+    validate_csv = validate_csv
   )
   cmdstan_procs <- CmdStanMCMCProcs$new(
     num_procs = chains,
@@ -1013,6 +1015,7 @@ CmdStanModel$set("public", name = "sample", value = sample_method)
 #'     init = NULL,
 #'     save_latent_dynamics = FALSE,
 #'     output_dir = NULL,
+#'     sig_figs = NULL,
 #'     chains = 4,
 #'     parallel_chains = getOption("mc.cores", 1),
 #'     chain_ids = seq_len(chains),
@@ -1031,7 +1034,6 @@ CmdStanModel$set("public", name = "sample", value = sample_method)
 #'     term_buffer = NULL,
 #'     window = NULL,
 #'     fixed_param = FALSE,
-#'     sig_figs = NULL,
 #'     validate_csv = TRUE,
 #'     show_messages = TRUE
 #'   )
@@ -1159,7 +1161,7 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method)
 #'   constrained variables, which shifts the mode due to the change of
 #'   variables. Thus modes correspond to modes of the model as written.
 #'
-#'   -- [*CmdStan Interface User's Guide*](https://github.com/stan-dev/cmdstan/releases/latest)
+#'   -- [*CmdStan User's Guide*](https://mc-stan.org/docs/cmdstan-guide/)
 #'
 #' @section Usage:
 #'   ```
@@ -1170,11 +1172,17 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method)
 #'     init = NULL,
 #'     save_latent_dynamics = FALSE,
 #'     output_dir = NULL,
+#'     sig_figs = NULL,
 #'     threads = NULL,
 #'     algorithm = NULL,
 #'     init_alpha = NULL,
 #'     iter = NULL,
-#'     sig_figs = NULL
+#'     tol_obj = NULL,
+#'     tol_rel_obj = NULL,
+#'     tol_grad = NULL,
+#'     tol_rel_grad = NULL,
+#'     tol_param = NULL,
+#'     history_size = NULL
 #'   )
 #'   ```
 #'
@@ -1184,16 +1192,27 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method)
 #'   arguments. These arguments are described briefly here and in greater detail
 #'   in the CmdStan manual. Arguments left at `NULL` default to the default used
 #'   by the installed version of CmdStan.
+#'   The latest [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/)
+#'   will have the defaults for the latest version of CmdStan.
 #'
 #'   * `threads`: (positive integer) If the model was
 #'   [compiled][model-method-compile] with threading support, the number of
 #'   threads to use in parallelized sections (e.g., when
 #'   using the Stan functions `reduce_sum()` or `map_rect()`).
+#'   * `iter`: (positive integer) The maximum number of iterations.
 #'   * `algorithm`: (string) The optimization algorithm. One of `"lbfgs"`,
-#'   `"bfgs"`, or `"newton"`.
-#'   * `iter`: (positive integer) The number of iterations.
-#'   * `init_alpha`: (nonnegative real) The line search step size for first
-#'   iteration. Not applicable if `algorithm="newton"`.
+#'   `"bfgs"`, or `"newton"`. The control parameters below are only available
+#'   for `"lbfgs"` and `"bfgs`. For their default values and more details see
+#'   the CmdStan User's Guide. The default values can also be obtained by
+#'   running `cmdstanr_example(method="optimize")$metadata()`.
+#'   * `init_alpha`: (positive real) The initial step size parameter.
+#'   * `tol_obj`: (positive real) Convergence tolerance on changes in objective function value.
+#'   * `tol_rel_obj`: (positive real) Convergence tolerance on relative changes in objective function value.
+#'   * `tol_grad`: (positive real) Convergence tolerance on the norm of the gradient.
+#'   * `tol_rel_grad`: (positive real) Convergence tolerance on the relative norm of the gradient.
+#'   * `tol_param`: (positive real) Convergence tolerance on changes in parameter value.
+#'   * `history_size`: (positive integer) The size of the history used when
+#'   approximating the Hessian. Only available for L-BFGS.
 #'
 #' @section Value: The `$optimize()` method returns a [`CmdStanMLE`] object.
 #'
@@ -1208,11 +1227,17 @@ optimize_method <- function(data = NULL,
                             init = NULL,
                             save_latent_dynamics = FALSE,
                             output_dir = NULL,
+                            sig_figs = NULL,
                             threads = NULL,
                             algorithm = NULL,
                             init_alpha = NULL,
                             iter = NULL,
-                            sig_figs = NULL) {
+                            tol_obj = NULL,
+                            tol_rel_obj = NULL,
+                            tol_grad = NULL,
+                            tol_rel_grad = NULL,
+                            tol_param = NULL,
+                            history_size = NULL) {
   checkmate::assert_integerish(threads, lower = 1, len = 1, null.ok = TRUE)
   if (is.null(self$cpp_options()[["stan_threads"]])) {
     if (!is.null(threads)) {
@@ -1231,7 +1256,13 @@ optimize_method <- function(data = NULL,
   optimize_args <- OptimizeArgs$new(
     algorithm = algorithm,
     init_alpha = init_alpha,
-    iter = iter
+    iter = iter,
+    tol_obj = tol_obj,
+    tol_rel_obj = tol_rel_obj,
+    tol_grad = tol_grad,
+    tol_rel_grad = tol_rel_grad,
+    tol_param = tol_param,
+    history_size = history_size
   )
   cmdstan_args <- CmdStanArgs$new(
     method_args = optimize_args,
@@ -1286,6 +1317,7 @@ CmdStanModel$set("public", name = "optimize", value = optimize_method)
 #'     init = NULL,
 #'     save_latent_dynamics = FALSE,
 #'     output_dir = NULL,
+#'     sig_figs = NULL,
 #'     threads = NULL,
 #'     algorithm = NULL,
 #'     iter = NULL,
@@ -1296,8 +1328,7 @@ CmdStanModel$set("public", name = "optimize", value = optimize_method)
 #'     adapt_iter = NULL,
 #'     tol_rel_obj = NULL,
 #'     eval_elbo = NULL,
-#'     output_samples = NULL,
-#'     sig_figs = NULL
+#'     output_samples = NULL
 #'   )
 #'   ```
 #'
@@ -1343,6 +1374,7 @@ variational_method <- function(data = NULL,
                                init = NULL,
                                save_latent_dynamics = FALSE,
                                output_dir = NULL,
+                               sig_figs = NULL,
                                threads = NULL,
                                algorithm = NULL,
                                iter = NULL,
@@ -1353,8 +1385,7 @@ variational_method <- function(data = NULL,
                                adapt_iter = NULL,
                                tol_rel_obj = NULL,
                                eval_elbo = NULL,
-                               output_samples = NULL,
-                               sig_figs = NULL) {
+                               output_samples = NULL) {
   checkmate::assert_integerish(threads, lower = 1, len = 1, null.ok = TRUE)
   if (is.null(self$cpp_options()[["stan_threads"]])) {
     if (!is.null(threads)) {
@@ -1424,9 +1455,9 @@ CmdStanModel$set("public", name = "variational", value = variational_method)
 #'     data = NULL,
 #'     seed = NULL,
 #'     output_dir = NULL,
+#'     sig_figs = NULL,
 #'     parallel_chains = getOption("mc.cores", 1),
-#'     threads_per_chain = NULL,
-#'     sig_figs = NULL
+#'     threads_per_chain = NULL
 #'   )
 #'   ```
 #'
@@ -1436,7 +1467,7 @@ CmdStanModel$set("public", name = "variational", value = variational_method)
 #'     - A [posterior::draws_array] (for MCMC) or [posterior::draws_matrix] (for VB)
 #'       object returned by CmdStanR's [`$draws()`][fit-method-draws] method.
 #'     - A character vector of paths to CmdStan CSV output files.
-#'   * `data`, `seed`, `output_dir`, `parallel_chains`, `threads_per_chain`, `sig_figs`:
+#'   * `data`, `seed`, `output_dir`, `sig_figs`, `parallel_chains`, `threads_per_chain`:
 #'   Same as for the [`$sample()`][model-method-sample] method.
 #'
 #' @section Value: The `$generate_quantities()` method returns a [`CmdStanGQ`] object.
@@ -1492,9 +1523,9 @@ generate_quantities_method <- function(fitted_params,
                                        data = NULL,
                                        seed = NULL,
                                        output_dir = NULL,
+                                       sig_figs = NULL,
                                        parallel_chains = getOption("mc.cores", 1),
-                                       threads_per_chain = NULL,
-                                       sig_figs = NULL) {
+                                       threads_per_chain = NULL) {
   checkmate::assert_integerish(parallel_chains, lower = 1, null.ok = TRUE)
   checkmate::assert_integerish(threads_per_chain, lower = 1, len = 1, null.ok = TRUE)
   if (is.null(self$cpp_options()[["stan_threads"]])) {

diff --git a/man-roxygen/model-common-args.R b/man-roxygen/model-common-args.R
@@ -52,7 +52,7 @@
 #'       methods like `$save_latent_dynamics_files()`).
 #'   * `sig_figs`: (positive integer) The number of significant figures used
 #'   for the output values. By default, CmdStan represent the output values with
-#'   6 significant figures. The upper limit for `sig_figs` is 18. Increasing 
-#'   this value can cause an increased usage of disk space due to larger 
+#'   6 significant figures. The upper limit for `sig_figs` is 18. Increasing
+#'   this value can cause an increased usage of disk space due to larger
 #'   output CSV files.
-#'   
+#'
diff --git a/man/model-method-generate-quantities.Rd b/man/model-method-generate-quantities.Rd