Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Profiling #434

Merged
merged 32 commits into from
Jan 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
df8fe14
add profiling
rok-cesnovar Jan 19, 2021
50f560b
Merge branch 'fix_2_26' into profiling
rok-cesnovar Jan 19, 2021
e85a56d
add basic profiling return
rok-cesnovar Jan 19, 2021
0066f00
Merge branch 'master' into profiling
rok-cesnovar Jan 19, 2021
5893ff2
return data frames
rok-cesnovar Jan 19, 2021
e0353be
add save_profile_files
rok-cesnovar Jan 20, 2021
846b1f7
add missing docs
rok-cesnovar Jan 20, 2021
386356d
add rd
rok-cesnovar Jan 20, 2021
7dc229e
fix arg
rok-cesnovar Jan 20, 2021
08d6586
fix docs
rok-cesnovar Jan 20, 2021
e4d5f04
fix profile_file arg
rok-cesnovar Jan 20, 2021
4e1100e
fix finalize
rok-cesnovar Jan 20, 2021
011dbc5
reorganize cmdstan version warning
rok-cesnovar Jan 20, 2021
30018eb
add test for profiling features
rok-cesnovar Jan 20, 2021
087cfbd
fix case when the files do not exist
rok-cesnovar Jan 21, 2021
46f9539
update tests
rok-cesnovar Jan 21, 2021
abf7791
fix finalizing
rok-cesnovar Jan 21, 2021
aaba318
remove separte datafile
rok-cesnovar Jan 21, 2021
ce99662
cleanup tests
rok-cesnovar Jan 21, 2021
3632e52
fix test
rok-cesnovar Jan 21, 2021
b4c23e5
doc edits
jgabry Jan 21, 2021
c067577
Merge branch 'profiling' of https://github.com/stan-dev/cmdstanr into…
jgabry Jan 21, 2021
43a3487
fix tests
rok-cesnovar Jan 21, 2021
f4e2ba8
cleanup code
rok-cesnovar Jan 21, 2021
5457295
add example
rok-cesnovar Jan 21, 2021
eecb8a9
fix profile_file NA bug
rok-cesnovar Jan 21, 2021
f345f70
Update fit-method-profiles.Rd
jgabry Jan 21, 2021
7a211c0
remove file.exists
rok-cesnovar Jan 21, 2021
0661c16
apply Jonah's suggestions
rok-cesnovar Jan 21, 2021
e7bd924
fix syntax
rok-cesnovar Jan 21, 2021
456a0a1
check if length 0 or if files don't exist
jgabry Jan 21, 2021
e6ae1ad
skip failing tests if using < 2.26
jgabry Jan 21, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ files. (#414)

* Faster CSV reading for multiple chains. (#419)

* New `profiles()` method for fitted model objects accesses profiling
information from R if profiling used in the Stan program. Support for profiling
Stan programs requires CmdStan >= 2.26. (#434)


# cmdstanr 0.3.0

Expand Down
20 changes: 16 additions & 4 deletions R/args.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,13 @@ CmdStanArgs <- R6::R6Class(
invisible(self)
},

new_file_names = function(type = c("output", "diagnostic")) {
new_file_names = function(type = c("output", "diagnostic", "profile")) {
basename <- self$model_name
type <- match.arg(type)
if (type == "diagnostic") {
basename <- paste0(basename, "-diagnostic")
} else if (type == "profile") {
basename <- paste0(basename, "-profile")
}
generate_file_names( # defined in utils.R
basename = basename,
Expand All @@ -89,9 +91,8 @@ CmdStanArgs <- R6::R6Class(
random = TRUE
)
},
new_files = function(type = c("output", "diagnostic")) {
new_files = function(type = c("output", "diagnostic", "profile")) {
files <- file.path(self$output_dir, self$new_file_names(type))
invisible(file.create(files))
files
},

Expand All @@ -101,12 +102,14 @@ CmdStanArgs <- R6::R6Class(
#' @param idx The run id. For MCMC this is the chain id, for optimization
#' this is just 1.
#' @param output_file File path to csv file where output will be written.
#' @param profile_file File path to csv file where profile data will be written.
#' @param latent_dynamics_file File path to csv file where the extra latent
#' dynamics information will be written.
#' @return Character vector of arguments of the form "name=value".
#'
compose_all_args = function(idx = NULL,
output_file = NULL,
profile_file = NULL,
latent_dynamics_file = NULL) {
args <- list()
idx <- idx %||% 1
Expand Down Expand Up @@ -143,6 +146,10 @@ CmdStanArgs <- R6::R6Class(
args$output <- c(args$output, paste0("sig_figs=", self$sig_figs))
}

if (!is.null(profile_file)) {
args$output <- c(args$output, paste0("profile_file=", profile_file))
}

args <- do.call(c, append(args, list(use.names = FALSE)))
self$method_args$compose(idx, args)
},
Expand Down Expand Up @@ -803,7 +810,12 @@ validate_seed <- function(seed, num_procs) {
if (is.null(seed)) {
return(invisible(TRUE))
}
checkmate::assert_integerish(seed, lower = 1)
if (cmdstan_version() < "2.26") {
lower_seed <- 1
} else {
lower_seed <- 0
}
checkmate::assert_integerish(seed, lower = lower_seed)
if (length(seed) > 1 && length(seed) != num_procs) {
stop("If 'seed' is specified it must be a single integer or one per chain.",
call. = FALSE)
Expand Down
1 change: 1 addition & 0 deletions R/csv.R
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ unavailable_methods_CmdStanFit_CSV <- c(
"output",
"return_codes",
"num_procs",
"save_profile_files", "profile_files", "profiles",
"time" # available for MCMC not others
)
error_unavailable_CmdStanFit_CSV <- function(...) {
Expand Down
86 changes: 81 additions & 5 deletions R/fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -400,10 +400,10 @@ CmdStanFit$set("public", name = "cmdstan_diagnose", value = cmdstan_diagnose)
#' Save output and data files
#'
#' @name fit-method-save_output_files
#' @aliases fit-method-save_data_file fit-method-save_latent_dynamics_files
#' fit-method-output_files fit-method-data_file fit-method-latent_dynamics_files
#' save_output_files save_data_file save_latent_dynamics_files
#' output_files data_file latent_dynamics_files
#' @aliases fit-method-save_data_file fit-method-save_latent_dynamics_files fit-method-save_profile_files
#' fit-method-output_files fit-method-data_file fit-method-latent_dynamics_files fit-method-profile_files
#' save_output_files save_data_file save_latent_dynamics_files save_profile_files
#' output_files data_file latent_dynamics_files profile_files
#'
#' @description All fitted model objects have methods for saving (moving to a
#' specified location) the files created by CmdStanR to hold CmdStan output
Expand Down Expand Up @@ -434,6 +434,10 @@ CmdStanFit$set("public", name = "cmdstan_diagnose", value = cmdstan_diagnose)
#' `$save_output_files()` except `"-diagnostic-"` is included in the new
#' file name after `basename`.
#'
#' For `$save_profile_files()` everything is the same as for
#' `$save_output_files()` except `"-profile-"` is included in the new
#' file name after `basename`.
#'
#' For `$save_data_file()` no `id` is included in the file name because even
#' with multiple MCMC chains the data file is the same.
#'
Expand Down Expand Up @@ -478,6 +482,15 @@ save_latent_dynamics_files <- function(dir = ".",
}
CmdStanFit$set("public", name = "save_latent_dynamics_files", value = save_latent_dynamics_files)

#' @rdname fit-method-save_output_files
save_profile_files <- function(dir = ".",
basename = NULL,
timestamp = TRUE,
random = TRUE) {
self$runset$save_profile_files(dir, basename, timestamp, random)
}
CmdStanFit$set("public", name = "save_profile_files", value = save_profile_files)

#' @rdname fit-method-save_output_files
save_data_file <- function(dir = ".",
basename = NULL,
Expand All @@ -496,6 +509,12 @@ output_files <- function(include_failed = FALSE) {
}
CmdStanFit$set("public", name = "output_files", value = output_files)

#' @rdname fit-method-save_output_files
profile_files <- function(include_failed = FALSE) {
self$runset$profile_files(include_failed)
}
CmdStanFit$set("public", name = "profile_files", value = profile_files)

#' @rdname fit-method-save_output_files
latent_dynamics_files <- function(include_failed = FALSE) {
self$runset$latent_dynamics_files(include_failed)
Expand Down Expand Up @@ -641,6 +660,63 @@ return_codes <- function() {
}
CmdStanFit$set("public", name = "return_codes", value = return_codes)

#' Return profiling data
#'
#' @name fit-method-profiles
#' @aliases profiles
#' @description The `$profiles()` method returns a list of data frames with
#' profiling data if any profiling data was written to the profile CSV files.
#' See [save_profile_files()] to control where the files are saved.
#'
#' Support for profiling Stan programs is available with CmdStan >= 2.26 and
#' requires adding profiling statements to the Stan program.
#'
#' @return A list of data frames with profiling data if the profiling CSV files
#' were created.
#'
#' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`]
#' @examples
#' \dontrun{
#' # first fit a model using MCMC
#' mcmc_program <- write_stan_file(
#' "data {
#' int<lower=0> N;
#' int<lower=0,upper=1> y[N];
#' }
#' parameters {
#' real<lower=0,upper=1> theta;
#' }
#' model {
#' profile("likelihood") {
#' y ~ bernoulli(theta);
#' }
#' }
#' generated quantities {
#' int y_rep[N];
#' profile("gq") {
#' y_rep = bernoulli_rng(rep_vector(theta, N));
#' }
#' }
#' "
#' )
#' mod_mcmc <- cmdstan_model(mcmc_program)
#'
jgabry marked this conversation as resolved.
Show resolved Hide resolved
#' data <- list(N = 10, y = c(1,1,0,0,0,1,0,1,0,0))
#' fit <- mod_mcmc$sample(data = data, seed = 123, refresh = 0)
#'
#' fit$profiles()
#' }
#'
profiles <- function() {
profiles <- list()
i <- 1
for (f in self$profile_files()) {
profiles[[i]] <- data.table::fread(f, data.table = FALSE)
i <- i + 1
}
profiles
}
CmdStanFit$set("public", name = "profiles", value = profiles)

# CmdStanMCMC -------------------------------------------------------------
#' CmdStanMCMC objects
Expand Down Expand Up @@ -786,7 +862,7 @@ CmdStanMCMC <- R6::R6Class(
csv_contents$post_warmup_draws[,,missing_variables],
along="variable"
)
}
}
}
if (!is.null(csv_contents$post_warmup_sampler_diagnostics)) {
if (is.null(private$sampler_diagnostics_)) {
Expand Down
61 changes: 56 additions & 5 deletions R/run.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ CmdStanRun <- R6::R6Class(
self$args <- args
self$procs <- procs
private$output_files_ <- self$new_output_files()
if (cmdstan_version() >= "2.26.0") {
private$profile_files_ <- self$new_profile_files()
}
if (self$args$save_latent_dynamics) {
private$latent_dynamics_files_ <- self$new_latent_dynamics_files()
}
Expand All @@ -40,6 +43,9 @@ CmdStanRun <- R6::R6Class(
new_latent_dynamics_files = function() {
self$args$new_files(type = "diagnostic")
},
new_profile_files = function() {
self$args$new_files(type = "profile")
},
latent_dynamics_files = function(include_failed = FALSE) {
if (!length(private$latent_dynamics_files_)) {
stop(
Expand All @@ -63,7 +69,22 @@ CmdStanRun <- R6::R6Class(
private$output_files_[ok]
}
},

profile_files = function(include_failed = FALSE) {
files <- private$profile_files_
if (!length(files) || !any(file.exists(files))) {
stop(
"No profile files found. ",
"The model that produced the fit did not use any profiling.",
call. = FALSE
)
}
if (include_failed) {
files
} else {
ok <- self$procs$is_finished() | self$procs$is_queued()
files[ok]
}
},
save_output_files = function(dir = ".",
basename = NULL,
timestamp = TRUE,
Expand All @@ -90,9 +111,9 @@ CmdStanRun <- R6::R6Class(
invisible(new_paths)
},
save_latent_dynamics_files = function(dir = ".",
basename = NULL,
timestamp = TRUE,
random = TRUE) {
basename = NULL,
timestamp = TRUE,
random = TRUE) {
current_files <- self$latent_dynamics_files(include_failed = TRUE) # used so we get error if 0 files
new_paths <- copy_temp_files(
current_paths = current_files,
Expand All @@ -114,6 +135,31 @@ CmdStanRun <- R6::R6Class(
private$latent_dynamics_files_saved_ <- TRUE
invisible(new_paths)
},
save_profile_files = function(dir = ".",
basename = NULL,
timestamp = TRUE,
random = TRUE) {
current_files <- self$profile_files(include_failed = TRUE) # used so we get error if 0 files
new_paths <- copy_temp_files(
current_paths = current_files,
new_dir = dir,
new_basename = paste0(basename %||% self$model_name(), "-profile"),
ids = self$proc_ids(),
ext = ".csv",
timestamp = timestamp,
random = random
)
file.remove(current_files[!current_files %in% new_paths])
private$profile_files_ <- new_paths
message(
"Moved ",
length(current_files),
" files and set internal paths to new locations:\n",
paste("-", new_paths, collapse = "\n")
)
private$profile_files_saved_ <- TRUE
invisible(new_paths)
},
save_data_file = function(dir = ".",
basename = NULL,
timestamp = TRUE,
Expand Down Expand Up @@ -144,6 +190,7 @@ CmdStanRun <- R6::R6Class(
self$args$compose_all_args(
idx = j,
output_file = private$output_files_[j],
profile_file = private$profile_files_[j],
latent_dynamics_file = private$latent_dynamics_files_[j] # maybe NULL
)
})
Expand Down Expand Up @@ -212,9 +259,11 @@ CmdStanRun <- R6::R6Class(
),
private = list(
output_files_ = character(),
profile_files_ = NULL,
output_files_saved_ = FALSE,
latent_dynamics_files_ = NULL,
latent_dynamics_files_saved_ = FALSE,
profile_files_saved_ = FALSE,
command_args_ = list(),

finalize = function() {
Expand All @@ -223,7 +272,9 @@ CmdStanRun <- R6::R6Class(
if (!private$output_files_saved_)
self$output_files(include_failed = TRUE),
if (self$args$save_latent_dynamics && !private$latent_dynamics_files_saved_)
self$latent_dynamics_files(include_failed = TRUE)
self$latent_dynamics_files(include_failed = TRUE),
if (cmdstan_version() > "2.25.0" && !private$profile_files_saved_)
private$profile_files_
)
unlink(temp_files)
}
Expand Down
57 changes: 57 additions & 0 deletions man/fit-method-profiles.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading