From bf09d5ba7bbb48b93acfda113ad6f27d7f3d98d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98ystein=20S=C3=B8rensen?= Date: Thu, 5 Jan 2023 07:12:45 +0100 Subject: [PATCH] Develop (#271) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added `plot.SMCMallows()` method + example (#114) * Changed class of SMC outputs to SMCMallows (#114) This makes proper dispatching of possible, withou having any other apparent consequences. * Adapted SMC vignette and unit tests to new `plot()` method (#114) * Fixed CodeFactor issue Redundant blank line at the end of a code block should be deleted. * Gathererd SMC plot functions on same file (#114) This should help with future DRYing, perhaps both subfunctions should eventually be internalized? * Increment version number to 1.2.0.9004 * Updated NEWS.md * Updated build CI config file Using template from https://github.com/r-lib/actions/tree/v2-branch/examples#standard-ci-workflow * Added `plot.SMCMallows()` method (#263) * Added `plot.SMCMallows()` method + example (#114) * Changed class of SMC outputs to SMCMallows (#114) This makes proper dispatching of possible, withou having any other apparent consequences. * Adapted SMC vignette and unit tests to new `plot()` method (#114) * Fixed CodeFactor issue Redundant blank line at the end of a code block should be deleted. * Gathererd SMC plot functions on same file (#114) This should help with future DRYing, perhaps both subfunctions should eventually be internalized? * Increment version number to 1.2.0.9004 * Updated NEWS.md * Updated docs * Removed duplicated function As mentioned in the in-code comment, `scalefun()` was already defined in a different source file. * Increment version number to 1.2.1.9001 Co-authored-by: Øystein Sørensen * Fix documentation for `plot.SMCMallows()` (#266) * Fixed links to other package functions Links were formatted wrongly assuming the package was setup to support markdown-formatted links. This fixes it. * Increment version number to 1.2.1.9002 * Reverting changes to the `compute_mallows()` docs * Reverting changes to NEWS.md Textual change is automatically introduced by `usethis::use_dev_version()`. * Deprecated `plot_*_posterior()` (#267) Following the conversation started [here](https://github.com/ocbe-uio/BayesMallows/pull/263#discussion_r1032828173), this commit moves the superseded subfunctions of `plot.SMCMallows()` into the `smc_mallows_deprecated.R` file for eventual removal. The subfunctions themselves were renamed and test units to test the deprecation warnings were written. * Matching SMC defaults to their MCMC counterparts (#269) * Updated documentation Some text were taken from the original implementation and make no longer sense. * Matched SMC defaults with their original counterparts (#114) * SMC metric defaults to footrule (#114) * SMC leap_size defaults to 1 (#114) * SMC alpha_prop_sd defaults to 0.5 (#114) * SMC alpha_max defaults to 1e6 (#114) * SMC lambda defautls to 0.1 (#114) * Update DESCRIPTION Incremented development version. Co-authored-by: Øystein Sørensen * Added changes from PR #269 to NEWS.md Those are important to mention, since they technically break backwards compatibility. I forgot to do that on he PR itself, thankfully there's a second chance. :) Co-authored-by: Waldir Leoncio --- .github/workflows/R-CMD-check.yaml | 68 +++--------- DESCRIPTION | 4 +- NAMESPACE | 1 + NEWS.md | 7 +- R/RcppExports.R | 42 +++---- R/plot.SMCMallows.R | 103 ++++++++++++++++++ R/smc_mallows_deprecated.R | 37 +++++-- R/smc_post_processing_functions.R | 97 +---------------- .../metropolis_hastings_alpha_example.R | 16 +-- inst/examples/plot.SMCMallows_example.R | 25 +++++ man/calculate_backward_probability.Rd | 2 +- man/calculate_forward_probability.Rd | 2 +- man/compute_posterior_intervals_alpha.Rd | 3 +- man/compute_posterior_intervals_rho.Rd | 3 +- man/compute_rho_consensus.Rd | 5 +- man/correction_kernel_pseudo.Rd | 2 +- man/get_exponent_sum.Rd | 2 +- man/get_sample_probabilities.Rd | 8 +- man/leap_and_shift_probs.Rd | 12 +- man/metropolis_hastings_alpha.Rd | 40 +++---- man/metropolis_hastings_aug_ranking.Rd | 8 +- man/metropolis_hastings_rho.Rd | 13 ++- man/plot.SMCMallows.Rd | 96 ++++++++++++++++ man/plot_alpha_posterior.Rd | 23 ---- man/plot_rho_posterior.Rd | 27 ----- man/smc_mallows_new_item_rank.Rd | 28 ++--- man/smc_mallows_new_users.Rd | 28 ++--- src/RcppExports.cpp | 62 +++++------ src/smc.h | 8 +- src/smc_calculate_probability.cpp | 14 +-- src/smc_correction_kernel_pseudo.cpp | 4 +- src/smc_get_exponent_sum.cpp | 2 +- src/smc_get_sample_probabilities.cpp | 4 +- src/smc_leap_and_shift_probs.cpp | 8 +- src/smc_mallows_new_item_rank.cpp | 61 ++++++----- src/smc_mallows_new_users.cpp | 38 +++---- src/smc_mallows_new_users.h | 9 +- src/smc_mallows_new_users_funs.cpp | 8 +- src/smc_metropolis_hastings_alpha.cpp | 8 +- src/smc_metropolis_hastings_aug_ranking.cpp | 4 +- src/smc_metropolis_hastings_rho.cpp | 4 +- tests/testthat/test-bulletproofing.R | 32 +++--- .../testthat/test-smc_individual_functions.R | 17 ++- .../testthat/test-smc_mallows_new_item_rank.R | 21 ++-- .../test-smc_mallows_partial_rankings.R | 22 ++-- tests/testthat/test-smc_pseudolikelihood.R | 10 +- .../testthat/test-smc_updated_new_item_rank.R | 8 +- vignettes/SMC-Mallows.Rmd | 44 ++------ 48 files changed, 573 insertions(+), 517 deletions(-) create mode 100644 R/plot.SMCMallows.R create mode 100644 inst/examples/plot.SMCMallows_example.R create mode 100644 man/plot.SMCMallows.Rd delete mode 100644 man/plot_alpha_posterior.Rd delete mode 100644 man/plot_rho_posterior.Rd diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 96658133..23b7e09f 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -7,71 +7,31 @@ name: R-CMD-check jobs: R-CMD-check: runs-on: ${{ matrix.config.os }} - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - strategy: fail-fast: false matrix: config: - {os: windows-latest, r: 'release'} - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" } - + - {os: ubuntu-20.04, r: 'release'} + - {os: ubuntu-20.04, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-20.04, r: 'oldrel-1'} env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - + R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v1 + - uses: actions/checkout@v3 + - uses: r-lib/actions/setup-pandoc@v2 + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v1 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Restore R package cache - uses: actions/cache@v2 + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: | - options(crayon.enabled = TRUE) - rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main + extra-packages: any::rcmdcheck + needs: check + - uses: r-lib/actions/check-r-package@v2 with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check + upload-snapshots: true diff --git a/DESCRIPTION b/DESCRIPTION index c6af5450..be711ecf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BayesMallows Type: Package Title: Bayesian Preference Learning with the Mallows Rank Model -Version: 1.2.1 +Version: 1.2.1.9003 Authors@R: c(person("Oystein", "Sorensen", email = "oystein.sorensen.1985@gmail.com", role = c("aut", "cre"), @@ -42,7 +42,7 @@ URL: https://github.com/ocbe-uio/BayesMallows License: GPL-3 Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 Depends: R (>= 3.5.0) Imports: Rcpp (>= 1.0.0), ggplot2 (>= 3.1.0), diff --git a/NAMESPACE b/NAMESPACE index 74f478ee..25b2ad8d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method(compute_consensus,BayesMallows) S3method(compute_posterior_intervals,BayesMallows) S3method(compute_posterior_intervals,SMCMallows) S3method(plot,BayesMallows) +S3method(plot,SMCMallows) S3method(print,BayesMallows) S3method(print,BayesMallowsMixtures) export(assess_convergence) diff --git a/NEWS.md b/NEWS.md index 3f3e4aa1..0005e5df 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,12 @@ +# BayesMallows (development versions) + +* Added `plot.SMCMallows()` method +* Changed default values and argument order on several SMC functions (see PR #269) + # BayesMallows 1.2.1 * PerMallows package has been removed from Imports because it is at risk of - being removed from CRAN. This means that for Ulam distance with more than + being removed from CRAN. This means that for Ulam distance with more than 95 items, the user will have to compute an importance sampling estimate. * Refactoring of data augmentation function for SMC Mallows. * Improved documentation of `sample_dataset` diff --git a/R/RcppExports.R b/R/RcppExports.R index 7c3faafb..40d9022f 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -196,7 +196,7 @@ run_mcmc <- function(rankings, obs_freq, nmc, constraints, cardinalities, logz_e #' @return backward_auxiliary_ranking_probability A numerical value of creating the previous augmented ranking using the same item ordering used to create the #' new augmented ranking in calculate_forward_probability function. #' @export -calculate_backward_probability <- function(item_ordering, partial_ranking, current_ranking, remaining_set, rho, alpha, n_items, metric) { +calculate_backward_probability <- function(item_ordering, partial_ranking, current_ranking, remaining_set, rho, alpha, n_items, metric = "footrule") { .Call(`_BayesMallows_calculate_backward_probability`, item_ordering, partial_ranking, current_ranking, remaining_set, rho, alpha, n_items, metric) } @@ -224,7 +224,7 @@ calculate_backward_probability <- function(item_ordering, partial_ranking, curre #' proposed augmented ranking and forward_prob a numerical value of the #' probability of creating the augmented ranking using the pseudolikelihood #' augmentation. -calculate_forward_probability <- function(item_ordering, partial_ranking, remaining_set, rho, alpha, n_items, metric) { +calculate_forward_probability <- function(item_ordering, partial_ranking, remaining_set, rho, alpha, n_items, metric = "footrule") { .Call(`_BayesMallows_calculate_forward_probability`, item_ordering, partial_ranking, remaining_set, rho, alpha, n_items, metric) } @@ -262,7 +262,7 @@ correction_kernel <- function(observed_ranking, current_ranking, n_items) { #' \code{"ulam"}. #' @return list containing R_obs, the proposed 'corrected' augmented ranking that is compatible with the new observed ranking for a user, and #' forward_auxiliary_ranking_probability, a numerical value for the probability of correcting the ranking to be compatible with R_obs. -correction_kernel_pseudo <- function(current_ranking, observed_ranking, rho, alpha, n_items, metric) { +correction_kernel_pseudo <- function(current_ranking, observed_ranking, rho, alpha, n_items, metric = "footrule") { .Call(`_BayesMallows_correction_kernel_pseudo`, current_ranking, observed_ranking, rho, alpha, n_items, metric) } @@ -311,7 +311,7 @@ correction_kernel_pseudo <- function(current_ranking, observed_ranking, rho, alp #' alpha = alpha, rho = rho, n_items = n_items, rankings = rankings , #' metric = metric #' ) -get_exponent_sum <- function(alpha, rho, n_items, rankings, metric) { +get_exponent_sum <- function(alpha, rho, n_items, rankings, metric = "footrule") { .Call(`_BayesMallows_get_exponent_sum`, alpha, rho, n_items, rankings, metric) } @@ -330,8 +330,8 @@ get_exponent_sum <- function(alpha, rho, n_items, rankings, metric) { #' @return sample_prob_list A numeric sequence of sample probabilities for selecting a specific rank given the current #' rho_item_rank #' @export -get_sample_probabilities <- function(rho_item_rank, alpha, remaining_set_ranks, metric, n_items) { - .Call(`_BayesMallows_get_sample_probabilities`, rho_item_rank, alpha, remaining_set_ranks, metric, n_items) +get_sample_probabilities <- function(rho_item_rank, alpha, remaining_set_ranks, n_items, metric = "footrule") { + .Call(`_BayesMallows_get_sample_probabilities`, rho_item_rank, alpha, remaining_set_ranks, n_items, metric) } #' @title Leap and Shift Probabilities @@ -353,12 +353,12 @@ get_sample_probabilities <- function(rho_item_rank, alpha, remaining_set_ranks, #' rho <- c(1, 2, 3, 4, 5, 6) #' n_items <- 6 #' -#' leap_and_shift_probs(rho, 1, n_items) -#' leap_and_shift_probs(rho, 2, n_items) -#' leap_and_shift_probs(rho, 3, n_items) +#' leap_and_shift_probs(rho, n_items, 1) +#' leap_and_shift_probs(rho, n_items, 2) +#' leap_and_shift_probs(rho, n_items, 3) #' -leap_and_shift_probs <- function(rho, leap_size, n_items) { - .Call(`_BayesMallows_leap_and_shift_probs`, rho, leap_size, n_items) +leap_and_shift_probs <- function(rho, n_items, leap_size = 1L) { + .Call(`_BayesMallows_leap_and_shift_probs`, rho, n_items, leap_size) } #' @title SMC-Mallows new users rank @@ -389,8 +389,8 @@ leap_and_shift_probs <- function(rho, leap_size, n_items) { #' @param alpha numeric value of the scale parameter. #' @return a 3d matrix containing: the samples of: rho, alpha and the augmented rankings, and the effective sample size at each iteration of the SMC algorithm. #' @export -smc_mallows_new_item_rank <- function(n_items, R_obs, metric, leap_size, N, Time, logz_estimate, mcmc_kernel_app, aug_rankings_init = NULL, rho_samples_init = NULL, alpha_samples_init = 0L, alpha = 0, alpha_prop_sd = 1, lambda = 1, alpha_max = 1, aug_method = "random", verbose = FALSE, alpha_fixed = FALSE) { - .Call(`_BayesMallows_smc_mallows_new_item_rank`, n_items, R_obs, metric, leap_size, N, Time, logz_estimate, mcmc_kernel_app, aug_rankings_init, rho_samples_init, alpha_samples_init, alpha, alpha_prop_sd, lambda, alpha_max, aug_method, verbose, alpha_fixed) +smc_mallows_new_item_rank <- function(n_items, R_obs, N, Time, logz_estimate, mcmc_kernel_app, aug_rankings_init = NULL, rho_samples_init = NULL, alpha_samples_init = 0L, alpha = 0, alpha_prop_sd = 0.5, lambda = 0.1, alpha_max = 1e6, aug_method = "random", verbose = FALSE, alpha_fixed = FALSE, metric = "footrule", leap_size = 1L) { + .Call(`_BayesMallows_smc_mallows_new_item_rank`, n_items, R_obs, N, Time, logz_estimate, mcmc_kernel_app, aug_rankings_init, rho_samples_init, alpha_samples_init, alpha, alpha_prop_sd, lambda, alpha_max, aug_method, verbose, alpha_fixed, metric, leap_size) } #' @title SMC-Mallows New Users @@ -438,8 +438,8 @@ smc_mallows_new_item_rank <- function(n_items, R_obs, metric, leap_size, N, Time #' #' @example inst/examples/smc_mallows_new_users_complete_example.R #' -smc_mallows_new_users <- function(R_obs, type, n_items, metric, leap_size, N, Time, mcmc_kernel_app, num_new_obs, alpha_prop_sd = 1, lambda = 1, alpha_max = 1, alpha = 0, aug_method = "random", logz_estimate = NULL, verbose = FALSE) { - .Call(`_BayesMallows_smc_mallows_new_users`, R_obs, type, n_items, metric, leap_size, N, Time, mcmc_kernel_app, num_new_obs, alpha_prop_sd, lambda, alpha_max, alpha, aug_method, logz_estimate, verbose) +smc_mallows_new_users <- function(R_obs, type, n_items, N, Time, mcmc_kernel_app, num_new_obs, alpha_prop_sd = 0.5, lambda = 0.1, alpha_max = 1e6, alpha = 0, aug_method = "random", logz_estimate = NULL, verbose = FALSE, metric = "footnote", leap_size = 1L) { + .Call(`_BayesMallows_smc_mallows_new_users`, R_obs, type, n_items, N, Time, mcmc_kernel_app, num_new_obs, alpha_prop_sd, lambda, alpha_max, alpha, aug_method, logz_estimate, verbose, metric, leap_size) } #' @title Metropolis-Hastings Alpha @@ -475,8 +475,8 @@ smc_mallows_new_users <- function(R_obs, type, n_items, metric, leap_size, N, Ti #' @example /inst/examples/metropolis_hastings_alpha_example.R #' #' @export -metropolis_hastings_alpha <- function(alpha, n_items, rankings, metric, rho, logz_estimate, alpha_prop_sd, lambda, alpha_max) { - .Call(`_BayesMallows_metropolis_hastings_alpha`, alpha, n_items, rankings, metric, rho, logz_estimate, alpha_prop_sd, lambda, alpha_max) +metropolis_hastings_alpha <- function(alpha, n_items, rankings, rho, logz_estimate, metric = "footrule", alpha_prop_sd = 0.5, alpha_max = 1e6, lambda = 0.1) { + .Call(`_BayesMallows_metropolis_hastings_alpha`, alpha, n_items, rankings, rho, logz_estimate, metric, alpha_prop_sd, alpha_max, lambda) } #' @title Metropolis-Hastings Augmented Ranking @@ -495,8 +495,8 @@ metropolis_hastings_alpha <- function(alpha, n_items, rankings, metric, rho, log #' @return R_curr or R_obs A ranking sequence vector representing proposed augmented ranking for next iteration of MCMC chain #' @export #' @keywords internal -metropolis_hastings_aug_ranking <- function(alpha, rho, n_items, partial_ranking, current_ranking, metric, pseudo) { - .Call(`_BayesMallows_metropolis_hastings_aug_ranking`, alpha, rho, n_items, partial_ranking, current_ranking, metric, pseudo) +metropolis_hastings_aug_ranking <- function(alpha, rho, n_items, partial_ranking, current_ranking, pseudo, metric = "footnote") { + .Call(`_BayesMallows_metropolis_hastings_aug_ranking`, alpha, rho, n_items, partial_ranking, current_ranking, pseudo, metric) } #' @title Metropolis-Hastings Rho @@ -535,7 +535,7 @@ metropolis_hastings_aug_ranking <- function(alpha, rho, n_items, partial_ranking #' rho = rho, leap_size = 1 #' ) #' -metropolis_hastings_rho <- function(alpha, n_items, rankings, metric, rho, leap_size) { - .Call(`_BayesMallows_metropolis_hastings_rho`, alpha, n_items, rankings, metric, rho, leap_size) +metropolis_hastings_rho <- function(alpha, n_items, rankings, rho, metric = "footnote", leap_size = 1L) { + .Call(`_BayesMallows_metropolis_hastings_rho`, alpha, n_items, rankings, rho, metric, leap_size) } diff --git a/R/plot.SMCMallows.R b/R/plot.SMCMallows.R new file mode 100644 index 00000000..115b4d81 --- /dev/null +++ b/R/plot.SMCMallows.R @@ -0,0 +1,103 @@ +#' @title Plot SMC Posterior Distributions +#' @description Plot posterior distributions of SMC-Mallow parameters. +#' @param x An object of type \code{SMC-Mallows}, returned for example from +#' \code{\link{smc_mallows_new_users}}. +#' @param nmc Number of Monte Carlo samples +#' @param burnin A numeric value specifying the number of iterations +#' to discard as burn-in. Defaults to \code{model_fit$burnin}, and must be +#' provided if \code{model_fit$burnin} does not exist. See +#' \code{\link{assess_convergence}}. +#' @param parameter Character string defining the parameter to plot. Available +#' options are \code{"alpha"} and \code{"rho"}. +#' @param time Integer determining the update slice to plot +#' @param C Number of cluster +#' @param colnames A vector of item names. If NULL, generic names are generated +#' for the items in the ranking. +#' @param items Either a vector of item names, or a vector of indices. If NULL, +#' five items are selected randomly. +#' @param ... Other arguments passed to \code{\link[base]{plot}} (not used). +#' @return A plot of the posterior distributions +#' @author Waldir Leoncio +#' @export +#' @example /inst/examples/plot.SMCMallows_example.R +plot.SMCMallows <- function(x, nmc = nrow(x$rho_samples[, 1, ]), burnin = 0, + parameter = "alpha", time = ncol(x$rho_samples[, 1, ]), C = 1, + colnames = NULL, items = NULL, ...) { + + if (parameter == "alpha") { + output <- x$alpha_samples[, time] + plot_alpha_smc(output, nmc, burnin) + } else if (parameter == "rho") { + output <- x$rho_samples[, , time] + plot_rho_smc(output, nmc, burnin, C, colnames, items) + } else { + stop("parameter must be either 'alpha' or 'rho'.") + } +} + +plot_alpha_smc <- function(output, nmc, burnin) { + alpha_samples_table <- data.frame(iteration = 1:nmc, value = output) + + plot_posterior_alpha <- ggplot2::ggplot(alpha_samples_table, ggplot2::aes_(x = ~value)) + + ggplot2::geom_density() + + ggplot2::xlab(expression(alpha)) + + ggplot2::ylab("Posterior density") + + ggplot2::ggtitle(label = "Implemented SMC scheme") + + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) + + print(plot_posterior_alpha) +} + +plot_rho_smc <- function(output, nmc, burnin, C, colnames = NULL, items = NULL) { + n_items <- dim(output)[2] + + if (is.null(items) && n_items > 5) { + message("Items not provided by user or more than 5 items in a ranking. Picking 5 at random.") + items <- sample(seq_len(n_items), 5, replace = FALSE) + items <- sort(items) + } else if (is.null(items) && n_items <= 5) { + items <- seq_len(n_items) + items <- sort(items) + } + + # do smc processing here + smc_plot <- smc_processing(output = output, colnames = colnames) + + if (!is.character(items)) { + items <- unique(smc_plot$item)[items] + } + + iteration <- rep(seq_len(nmc), times = n_items) + df <- cbind(iteration, smc_plot) + + if (C == 1) { + df <- cbind(cluster = "Cluster 1", df) + } + + df <- df[df$iteration > burnin & df$item %in% items, , drop = FALSE] + + # Compute the density, rather than the count, since the latter + # depends on the number of Monte Carlo samples + df <- aggregate(list(n = df$iteration), + list(cluster = df$cluster, item = df$item, value = df$value), + FUN = length + ) + df$pct <- df$n / sum(df$n) + + df$item <- factor(df$item, levels = c(items)) + + # Finally create the plot + p <- ggplot2::ggplot(df, ggplot2::aes(x = .data$value, y = .data$pct)) + + ggplot2::geom_col() + + ggplot2::scale_x_continuous(labels = scalefun) + + ggplot2::xlab("rank") + + ggplot2::ylab("Posterior probability") + + if (C == 1) { + p <- p + ggplot2::facet_wrap(~ .data$item) + } else { + p <- p + ggplot2::facet_wrap(~ .data$cluster + .data$item) + } + + return(p) +} diff --git a/R/smc_mallows_deprecated.R b/R/smc_mallows_deprecated.R index de0d1280..7bce3fe7 100644 --- a/R/smc_mallows_deprecated.R +++ b/R/smc_mallows_deprecated.R @@ -7,9 +7,9 @@ smc_mallows_new_users_partial <- function( ) { .Deprecated("smc_mallows_new_users") smc_mallows_new_users( - R_obs, "partial", n_items, metric, leap_size, N, Time, mcmc_kernel_app, + R_obs, "partial", n_items, N, Time, mcmc_kernel_app, num_new_obs, alpha_prop_sd, lambda, alpha_max, 0, aug_method, logz_estimate, - verbose + verbose, metric, leap_size ) } @@ -22,9 +22,9 @@ smc_mallows_new_users_complete <- function( ) { .Deprecated("smc_mallows_new_users") smc_mallows_new_users( - R_obs, "complete", n_items, metric, leap_size, N, Time, mcmc_kernel_app, + R_obs, "complete", n_items, N, Time, mcmc_kernel_app, num_new_obs, alpha_prop_sd, lambda, alpha_max, 0, "random", - logz_estimate, verbose + logz_estimate, verbose, metric, leap_size ) } @@ -36,9 +36,10 @@ smc_mallows_new_users_partial_alpha_fixed <- function( num_new_obs, aug_method, alpha) { .Deprecated("smc_mallows_new_users") smc_mallows_new_users( - R_obs, "partial_alpha_fixed", n_items, metric, leap_size, N, Time, + R_obs, "partial_alpha_fixed", n_items, N, Time, mcmc_kernel_app, num_new_obs, 1, 1, 1, alpha, aug_method, logz_estimate, - FALSE) + FALSE, metric, leap_size + ) } #' @describeIn smc_mallows_new_item_rank Deprecated function for @@ -52,10 +53,10 @@ smc_mallows_new_item_rank_alpha_fixed <- function( ){ .Deprecated("smc_mallows_new_item_rank") smc_mallows_new_item_rank( - n_items, R_obs, metric, leap_size, N, Time, logz_estimate, mcmc_kernel_app, + n_items, R_obs, N, Time, logz_estimate, mcmc_kernel_app, alpha = alpha, alpha_prop_sd = alpha_prop_sd, lambda = lambda, alpha_max = alpha_max, aug_method = aug_method, verbose = verbose, - alpha_fixed = TRUE + alpha_fixed = TRUE, metric = metric, leap_size = leap_size ) } @@ -68,5 +69,23 @@ metropolis_hastings_aug_ranking_pseudo <- function( ) { .Deprecated("metropolis_hastings_aug_ranking") return(metropolis_hastings_aug_ranking( - alpha, rho, n_items, partial_ranking, current_ranking, metric, TRUE)) + alpha, rho, n_items, partial_ranking, current_ranking, TRUE, metric)) +} + +#' @describeIn plot.SMCMallows Deprecated function for +#' \code{plot_alpha_posterior}. +#' @export +#' @keywords internal +plot_alpha_posterior <- function(output, nmc, burnin) { + .Deprecated("plot") +} + +#' @describeIn plot.SMCMallows Deprecated function for +#' \code{plot_rho_posterior}. +#' @export +#' @keywords internal +plot_rho_posterior <- function( + output, nmc, burnin, C, colnames = NULL, items = NULL +) { + .Deprecated("plot") } diff --git a/R/smc_post_processing_functions.R b/R/smc_post_processing_functions.R index dab42313..c17fc352 100644 --- a/R/smc_post_processing_functions.R +++ b/R/smc_post_processing_functions.R @@ -4,7 +4,6 @@ #' @author Anja Stein #' @param output input #' @param colnames colnames -# AS: edited this function to include parameter `colnames`. This resolve issues in #118 with post processing functions not printing the names of items in rankings. # The `default` is set to NULL so tat we do not cause plotting issues in `plot_rho_heatplot. smc_processing <- function(output, colnames = NULL) { df <- data.frame(data = output) @@ -39,8 +38,7 @@ smc_processing <- function(output, colnames = NULL) { #' @inheritParams smc_processing #' @param nmc Number of Monte Carlo samples #' @param burnin A numeric value specifying the number of iterations -#' to discard as burn-in. Defaults to \code{model_fit$burnin}, and must be -#' provided if \code{model_fit$burnin} does not exist. See \code{\link{assess_convergence}}. +#' to discard as burn-in. #' @param verbose if \code{TRUE}, prints the final output even if the function #' is assigned to an object. Defaults to \code{FALSE}. #' @export @@ -82,7 +80,7 @@ compute_posterior_intervals_rho <- function(output, nmc, burnin, colnames = NULL #' @author Anja Stein #' # AS: added an extra inout variable `colnames`. This is called in the function `smc_processing`. -compute_rho_consensus <- function(output, nmc, burnin, C, type, colnames = NULL, verbose = FALSE) { +compute_rho_consensus <- function(output, nmc, burnin, C, type = "CP", colnames = NULL, verbose = FALSE) { n_items <- dim(output)[2] #---------------------------------------------------------------- @@ -115,29 +113,6 @@ compute_rho_consensus <- function(output, nmc, burnin, C, type, colnames = NULL, return(results) } -#' @title Plot Alpha Posterior -#' @description posterior for alpha -#' @inheritParams compute_posterior_intervals_rho -#' @export -#' @author Anja Stein -#' -# AS: if you remove the verbose input variable, then the function will be consistent -# with the other plot functions(they all print when verbose=FALSE, but this function doesn't.) -# `plot_rho_heatplot` doesn't require the variable `verbose`, -# so I'm not sure if this function does to plot the density of alpha -plot_alpha_posterior <- function(output, nmc, burnin) { - alpha_samples_table <- data.frame(iteration = 1:nmc, value = output) - - plot_posterior_alpha <- ggplot2::ggplot(alpha_samples_table, ggplot2::aes_(x = ~value)) + - ggplot2::geom_density() + - ggplot2::xlab(expression(alpha)) + - ggplot2::ylab("Posterior density") + - ggplot2::ggtitle(label = "Implemented SMC scheme") + - ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) - - print(plot_posterior_alpha) -} - #' @title Compute Posterior Intervals Alpha #' @description posterior confidence intervals #' @inheritParams compute_posterior_intervals_rho @@ -157,71 +132,3 @@ compute_posterior_intervals_alpha <- function(output, nmc, burnin, verbose = FAL if (verbose) print(alpha_mixture_posterior_interval) return(alpha_mixture_posterior_interval) } - -#' @title Plot the posterior for rho for each item -#' @param output input -#' @param nmc Number of Monte Carlo samples -#' @param burnin A numeric value specifying the number of iterations -#' to discard as burn-in. Defaults to \code{model_fit$burnin}, and must be -#' provided if \code{model_fit$burnin} does not exist. See \code{\link{assess_convergence}} -#' @param C Number of cluster -#' @param colnames A vector of item names. If NULL, we generate generic names for the items in the ranking. -#' @param items Either a vector of item names, or a -#' vector of indices. If NULL, five items are selected randomly. -#' @export -plot_rho_posterior <- function(output, nmc, burnin, C, colnames = NULL, items = NULL) { - n_items <- dim(output)[2] - - if (is.null(items) && n_items > 5) { - message("Items not provided by user or more than 5 items in a ranking. Picking 5 at random.") - items <- sample(seq_len(n_items), 5, replace = FALSE) - items <- sort(items) - } else if (is.null(items) && n_items <= 5) { - items <- seq_len(n_items) - items <- sort(items) - } - - # do smc processing here - smc_plot <- smc_processing(output = output, colnames = colnames) - - if (!is.character(items)) { - items <- unique(smc_plot$item)[items] - } - - iteration <- rep(seq_len(nmc), times = n_items) - df <- cbind(iteration, smc_plot) - - if (C == 1) { - df <- cbind(cluster = "Cluster 1", df) - } - - df <- df[df$iteration > burnin & df$item %in% items, , drop = FALSE] - - # Compute the density, rather than the count, since the latter - # depends on the number of Monte Carlo samples - df <- aggregate(list(n = df$iteration), - list(cluster = df$cluster, item = df$item, value = df$value), - FUN = length - ) - df$pct <- df$n / sum(df$n) - - df$item <- factor(df$item, levels = c(items)) - - # Taken from misc.R function in BayesMallows - scalefun <- function(x) sprintf("%d", as.integer(x)) - - # Finally create the plot - p <- ggplot2::ggplot(df, ggplot2::aes(x = .data$value, y = .data$pct)) + - ggplot2::geom_col() + - ggplot2::scale_x_continuous(labels = scalefun) + - ggplot2::xlab("rank") + - ggplot2::ylab("Posterior probability") - - if (C == 1) { - p <- p + ggplot2::facet_wrap(~ .data$item) - } else { - p <- p + ggplot2::facet_wrap(~ .data$cluster + .data$item) - } - - return(p) -} diff --git a/inst/examples/metropolis_hastings_alpha_example.R b/inst/examples/metropolis_hastings_alpha_example.R index 3d182558..c1f3acfc 100644 --- a/inst/examples/metropolis_hastings_alpha_example.R +++ b/inst/examples/metropolis_hastings_alpha_example.R @@ -17,21 +17,21 @@ logz_estimate <- estimate_partition_function( ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, alpha_prop_sd = 0.5, - lambda = 0.1, alpha_max = 20 + alpha, n_items, rankings, rho, logz_estimate, alpha_prop_sd = 0.5, + lambda = 0.1, alpha_max = 20, metric ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.15, lambda = 0.1, alpha_max = 20 + alpha, n_items, rankings, rho, logz_estimate, + alpha_prop_sd = 0.15, lambda = 0.1, alpha_max = 20, metric ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 20 + alpha, n_items, rankings, rho, logz_estimate, + alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 20, metric ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.15, lambda = 0.15, alpha_max = 20 + alpha, n_items, rankings, rho, logz_estimate, + alpha_prop_sd = 0.15, lambda = 0.15, alpha_max = 20, metric ) diff --git a/inst/examples/plot.SMCMallows_example.R b/inst/examples/plot.SMCMallows_example.R new file mode 100644 index 00000000..f49f9c57 --- /dev/null +++ b/inst/examples/plot.SMCMallows_example.R @@ -0,0 +1,25 @@ +set.seed(994) + +n_items <- dim(sushi_rankings)[2] +metric <- "footrule" + +# Estimate the logarithm of the partition function of the Mallows rank model +logz_estimate <- estimate_partition_function( + method = "importance_sampling", + alpha_vector = seq(from = 0, to = 15, by = 0.5), n_items = n_items, + metric = metric, nmc = 1e2, degree = 10 +) + +# Perform the resample-move SMC algorithm +smc_test <- smc_mallows_new_users( + R_obs = sushi_rankings[1:100, ], type = "complete", n_items = n_items, + metric = metric, leap_size = floor(n_items / 5), N = 100, Time = 10, + logz_estimate = logz_estimate, mcmc_kernel_app = 5, num_new_obs = 5, + alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 1e3 +) + +# Plot rho +plot(smc_test, colnames = colnames(sushi_rankings), parameter = "rho") + +# Plot alpha +plot(smc_test, parameter = "alpha") diff --git a/man/calculate_backward_probability.Rd b/man/calculate_backward_probability.Rd index 788ada7e..fe49d0fb 100644 --- a/man/calculate_backward_probability.Rd +++ b/man/calculate_backward_probability.Rd @@ -12,7 +12,7 @@ calculate_backward_probability( rho, alpha, n_items, - metric + metric = "footrule" ) } \arguments{ diff --git a/man/calculate_forward_probability.Rd b/man/calculate_forward_probability.Rd index 21ba0f72..989dd6eb 100644 --- a/man/calculate_forward_probability.Rd +++ b/man/calculate_forward_probability.Rd @@ -11,7 +11,7 @@ calculate_forward_probability( rho, alpha, n_items, - metric + metric = "footrule" ) } \arguments{ diff --git a/man/compute_posterior_intervals_alpha.Rd b/man/compute_posterior_intervals_alpha.Rd index f6460482..da76ea29 100644 --- a/man/compute_posterior_intervals_alpha.Rd +++ b/man/compute_posterior_intervals_alpha.Rd @@ -12,8 +12,7 @@ compute_posterior_intervals_alpha(output, nmc, burnin, verbose = FALSE) \item{nmc}{Number of Monte Carlo samples} \item{burnin}{A numeric value specifying the number of iterations -to discard as burn-in. Defaults to \code{model_fit$burnin}, and must be -provided if \code{model_fit$burnin} does not exist. See \code{\link{assess_convergence}}.} +to discard as burn-in.} \item{verbose}{if \code{TRUE}, prints the final output even if the function is assigned to an object. Defaults to \code{FALSE}.} diff --git a/man/compute_posterior_intervals_rho.Rd b/man/compute_posterior_intervals_rho.Rd index 7148af1e..2cd638ea 100644 --- a/man/compute_posterior_intervals_rho.Rd +++ b/man/compute_posterior_intervals_rho.Rd @@ -18,8 +18,7 @@ compute_posterior_intervals_rho( \item{nmc}{Number of Monte Carlo samples} \item{burnin}{A numeric value specifying the number of iterations -to discard as burn-in. Defaults to \code{model_fit$burnin}, and must be -provided if \code{model_fit$burnin} does not exist. See \code{\link{assess_convergence}}.} +to discard as burn-in.} \item{colnames}{colnames} diff --git a/man/compute_rho_consensus.Rd b/man/compute_rho_consensus.Rd index f657219d..4c917622 100644 --- a/man/compute_rho_consensus.Rd +++ b/man/compute_rho_consensus.Rd @@ -9,7 +9,7 @@ compute_rho_consensus( nmc, burnin, C, - type, + type = "CP", colnames = NULL, verbose = FALSE ) @@ -20,8 +20,7 @@ compute_rho_consensus( \item{nmc}{Number of Monte Carlo samples} \item{burnin}{A numeric value specifying the number of iterations -to discard as burn-in. Defaults to \code{model_fit$burnin}, and must be -provided if \code{model_fit$burnin} does not exist. See \code{\link{assess_convergence}}.} +to discard as burn-in.} \item{C}{C} diff --git a/man/correction_kernel_pseudo.Rd b/man/correction_kernel_pseudo.Rd index 77ec7bdb..f9edde7c 100644 --- a/man/correction_kernel_pseudo.Rd +++ b/man/correction_kernel_pseudo.Rd @@ -10,7 +10,7 @@ correction_kernel_pseudo( rho, alpha, n_items, - metric + metric = "footrule" ) } \arguments{ diff --git a/man/get_exponent_sum.Rd b/man/get_exponent_sum.Rd index 69735088..6ba43cb9 100644 --- a/man/get_exponent_sum.Rd +++ b/man/get_exponent_sum.Rd @@ -4,7 +4,7 @@ \alias{get_exponent_sum} \title{Get exponent in Mallows log-likelihood} \usage{ -get_exponent_sum(alpha, rho, n_items, rankings, metric) +get_exponent_sum(alpha, rho, n_items, rankings, metric = "footrule") } \arguments{ \item{alpha}{Numeric value of the scale parameter} diff --git a/man/get_sample_probabilities.Rd b/man/get_sample_probabilities.Rd index 2e80ab4c..7eff302b 100644 --- a/man/get_sample_probabilities.Rd +++ b/man/get_sample_probabilities.Rd @@ -8,8 +8,8 @@ get_sample_probabilities( rho_item_rank, alpha, remaining_set_ranks, - metric, - n_items + n_items, + metric = "footrule" ) } \arguments{ @@ -19,12 +19,12 @@ get_sample_probabilities( \item{remaining_set_ranks}{A sequence of integer values of the set of possible ranks that we can assign the item} +\item{n_items}{Integer is the number of items in the consensus ranking} + \item{metric}{A character string specifying the distance metric to use in the Bayesian Mallows Model. Available options are \code{"footrule"}, \code{"spearman"}, \code{"cayley"}, \code{"hamming"}, \code{"kendall"}, and \code{"ulam"}.} - -\item{n_items}{Integer is the number of items in the consensus ranking} } \value{ sample_prob_list A numeric sequence of sample probabilities for selecting a specific rank given the current diff --git a/man/leap_and_shift_probs.Rd b/man/leap_and_shift_probs.Rd index 4ea1f995..9f7e1b01 100644 --- a/man/leap_and_shift_probs.Rd +++ b/man/leap_and_shift_probs.Rd @@ -4,15 +4,15 @@ \alias{leap_and_shift_probs} \title{Leap and Shift Probabilities} \usage{ -leap_and_shift_probs(rho, leap_size, n_items) +leap_and_shift_probs(rho, n_items, leap_size = 1L) } \arguments{ \item{rho}{A ranking sequence} +\item{n_items}{Integer is the number of items in a ranking} + \item{leap_size}{Integer specifying the step size of the leap-and-shift proposal distribution.} - -\item{n_items}{Integer is the number of items in a ranking} } \value{ A list containing: @@ -29,9 +29,9 @@ Calculates transition probabilities for proposing a new rho rho <- c(1, 2, 3, 4, 5, 6) n_items <- 6 -leap_and_shift_probs(rho, 1, n_items) -leap_and_shift_probs(rho, 2, n_items) -leap_and_shift_probs(rho, 3, n_items) +leap_and_shift_probs(rho, n_items, 1) +leap_and_shift_probs(rho, n_items, 2) +leap_and_shift_probs(rho, n_items, 3) } \keyword{internal} diff --git a/man/metropolis_hastings_alpha.Rd b/man/metropolis_hastings_alpha.Rd index 5f4aba33..02277e7d 100644 --- a/man/metropolis_hastings_alpha.Rd +++ b/man/metropolis_hastings_alpha.Rd @@ -8,12 +8,12 @@ metropolis_hastings_alpha( alpha, n_items, rankings, - metric, rho, logz_estimate, - alpha_prop_sd, - lambda, - alpha_max + metric = "footrule", + alpha_prop_sd = 0.5, + alpha_max = 1e+06, + lambda = 0.1 ) } \arguments{ @@ -23,28 +23,28 @@ metropolis_hastings_alpha( \item{rankings}{the observed rankings, i.e, preference data} -\item{metric}{A character string specifying the distance metric to use -in the Bayesian Mallows Model. Available options are \code{"footrule"}, -\code{"spearman"}, \code{"cayley"}, \code{"hamming"}, \code{"kendall"}, -and \code{"ulam"}.} - \item{rho}{Numeric vector specifying the current consensus ranking} \item{logz_estimate}{Estimate grid of log of partition function, computed with \code{\link{estimate_partition_function}} in the BayesMallow R package {estimate_partition_function}.} +\item{metric}{A character string specifying the distance metric to use +in the Bayesian Mallows Model. Available options are \code{"footrule"}, +\code{"spearman"}, \code{"cayley"}, \code{"hamming"}, \code{"kendall"}, +and \code{"ulam"}.} + \item{alpha_prop_sd}{Numeric value specifying the standard deviation of the lognormal proposal distribution used for \eqn{\alpha} in the Metropolis-Hastings algorithm. Defaults to \code{0.1}.} +\item{alpha_max}{Maximum value of \code{alpha} in the truncated exponential +prior distribution.} + \item{lambda}{Strictly positive numeric value specifying the rate parameter of the truncated exponential prior distribution of \eqn{\alpha}. Defaults to \code{0.1}. When \code{n_cluster > 1}, each mixture component \eqn{\alpha_{c}} has the same prior distribution.} - -\item{alpha_max}{Maximum value of \code{alpha} in the truncated exponential -prior distribution.} } \value{ \code{alpha} or \code{alpha_prime}: Numeric value to be used @@ -79,23 +79,23 @@ logz_estimate <- estimate_partition_function( ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, alpha_prop_sd = 0.5, - lambda = 0.1, alpha_max = 20 + alpha, n_items, rankings, rho, logz_estimate, alpha_prop_sd = 0.5, + lambda = 0.1, alpha_max = 20, metric ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.15, lambda = 0.1, alpha_max = 20 + alpha, n_items, rankings, rho, logz_estimate, + alpha_prop_sd = 0.15, lambda = 0.1, alpha_max = 20, metric ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 20 + alpha, n_items, rankings, rho, logz_estimate, + alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 20, metric ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.15, lambda = 0.15, alpha_max = 20 + alpha, n_items, rankings, rho, logz_estimate, + alpha_prop_sd = 0.15, lambda = 0.15, alpha_max = 20, metric ) } \author{ diff --git a/man/metropolis_hastings_aug_ranking.Rd b/man/metropolis_hastings_aug_ranking.Rd index 211b2dbd..5672bd00 100644 --- a/man/metropolis_hastings_aug_ranking.Rd +++ b/man/metropolis_hastings_aug_ranking.Rd @@ -11,8 +11,8 @@ metropolis_hastings_aug_ranking( n_items, partial_ranking, current_ranking, - metric, - pseudo + pseudo, + metric = "footnote" ) metropolis_hastings_aug_ranking_pseudo( @@ -35,12 +35,12 @@ metropolis_hastings_aug_ranking_pseudo( \item{current_ranking}{An complete rank sequence vector of the proposed augmented ranking obtained from calculate_forward_probability function} +\item{pseudo}{Boolean specifying whether to use pseudo proposal or not.s} + \item{metric}{A character string specifying the distance metric to use in the Bayesian Mallows Model. Available options are \code{"footrule"}, \code{"spearman"}, \code{"cayley"}, \code{"hamming"}, \code{"kendall"}, and \code{"ulam"}.} - -\item{pseudo}{Boolean specifying whether to use pseudo proposal or not.s} } \value{ R_curr or R_obs A ranking sequence vector representing proposed augmented ranking for next iteration of MCMC chain diff --git a/man/metropolis_hastings_rho.Rd b/man/metropolis_hastings_rho.Rd index 4f69c4d6..661b45ef 100644 --- a/man/metropolis_hastings_rho.Rd +++ b/man/metropolis_hastings_rho.Rd @@ -4,7 +4,14 @@ \alias{metropolis_hastings_rho} \title{Metropolis-Hastings Rho} \usage{ -metropolis_hastings_rho(alpha, n_items, rankings, metric, rho, leap_size) +metropolis_hastings_rho( + alpha, + n_items, + rankings, + rho, + metric = "footnote", + leap_size = 1L +) } \arguments{ \item{alpha}{Numeric value of the scale parameter} @@ -18,12 +25,12 @@ can be a vector.} rankings in each row. Alternatively, if \eqn{N} equals 1, \code{rankings} can be a vector.} +\item{rho}{A ranking sequence} + \item{metric}{Character string specifying the distance measure to use. Available options are \code{"kendall"}, \code{"cayley"}, \code{"hamming"}, \code{"ulam"}, \code{"footrule"} and \code{"spearman"}.} -\item{rho}{A ranking sequence} - \item{leap_size}{Integer specifying the step size of the leap-and-shift proposal distribution.} } diff --git a/man/plot.SMCMallows.Rd b/man/plot.SMCMallows.Rd new file mode 100644 index 00000000..dad81995 --- /dev/null +++ b/man/plot.SMCMallows.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot.SMCMallows.R, R/smc_mallows_deprecated.R +\name{plot.SMCMallows} +\alias{plot.SMCMallows} +\alias{plot_alpha_posterior} +\alias{plot_rho_posterior} +\title{Plot SMC Posterior Distributions} +\usage{ +\method{plot}{SMCMallows}( + x, + nmc = nrow(x$rho_samples[, 1, ]), + burnin = 0, + parameter = "alpha", + time = ncol(x$rho_samples[, 1, ]), + C = 1, + colnames = NULL, + items = NULL, + ... +) + +plot_alpha_posterior(output, nmc, burnin) + +plot_rho_posterior(output, nmc, burnin, C, colnames = NULL, items = NULL) +} +\arguments{ +\item{x}{An object of type \code{SMC-Mallows}, returned for example from +\code{\link{smc_mallows_new_users}}.} + +\item{nmc}{Number of Monte Carlo samples} + +\item{burnin}{A numeric value specifying the number of iterations +to discard as burn-in. Defaults to \code{model_fit$burnin}, and must be +provided if \code{model_fit$burnin} does not exist. See +\code{\link{assess_convergence}}.} + +\item{parameter}{Character string defining the parameter to plot. Available +options are \code{"alpha"} and \code{"rho"}.} + +\item{time}{Integer determining the update slice to plot} + +\item{C}{Number of cluster} + +\item{colnames}{A vector of item names. If NULL, generic names are generated +for the items in the ranking.} + +\item{items}{Either a vector of item names, or a vector of indices. If NULL, +five items are selected randomly.} + +\item{...}{Other arguments passed to \code{\link[base]{plot}} (not used).} +} +\value{ +A plot of the posterior distributions +} +\description{ +Plot posterior distributions of SMC-Mallow parameters. +} +\section{Functions}{ +\itemize{ +\item \code{plot_alpha_posterior()}: Deprecated function for +\code{plot_alpha_posterior}. + +\item \code{plot_rho_posterior()}: Deprecated function for +\code{plot_rho_posterior}. + +}} +\examples{ +set.seed(994) + +n_items <- dim(sushi_rankings)[2] +metric <- "footrule" + +# Estimate the logarithm of the partition function of the Mallows rank model +logz_estimate <- estimate_partition_function( + method = "importance_sampling", + alpha_vector = seq(from = 0, to = 15, by = 0.5), n_items = n_items, + metric = metric, nmc = 1e2, degree = 10 +) + +# Perform the resample-move SMC algorithm +smc_test <- smc_mallows_new_users( + R_obs = sushi_rankings[1:100, ], type = "complete", n_items = n_items, + metric = metric, leap_size = floor(n_items / 5), N = 100, Time = 10, + logz_estimate = logz_estimate, mcmc_kernel_app = 5, num_new_obs = 5, + alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 1e3 +) + +# Plot rho +plot(smc_test, colnames = colnames(sushi_rankings), parameter = "rho") + +# Plot alpha +plot(smc_test, parameter = "alpha") +} +\author{ +Waldir Leoncio +} +\keyword{internal} diff --git a/man/plot_alpha_posterior.Rd b/man/plot_alpha_posterior.Rd deleted file mode 100644 index 482c7561..00000000 --- a/man/plot_alpha_posterior.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/smc_post_processing_functions.R -\name{plot_alpha_posterior} -\alias{plot_alpha_posterior} -\title{Plot Alpha Posterior} -\usage{ -plot_alpha_posterior(output, nmc, burnin) -} -\arguments{ -\item{output}{input} - -\item{nmc}{Number of Monte Carlo samples} - -\item{burnin}{A numeric value specifying the number of iterations -to discard as burn-in. Defaults to \code{model_fit$burnin}, and must be -provided if \code{model_fit$burnin} does not exist. See \code{\link{assess_convergence}}.} -} -\description{ -posterior for alpha -} -\author{ -Anja Stein -} diff --git a/man/plot_rho_posterior.Rd b/man/plot_rho_posterior.Rd deleted file mode 100644 index f81e962b..00000000 --- a/man/plot_rho_posterior.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/smc_post_processing_functions.R -\name{plot_rho_posterior} -\alias{plot_rho_posterior} -\title{Plot the posterior for rho for each item} -\usage{ -plot_rho_posterior(output, nmc, burnin, C, colnames = NULL, items = NULL) -} -\arguments{ -\item{output}{input} - -\item{nmc}{Number of Monte Carlo samples} - -\item{burnin}{A numeric value specifying the number of iterations -to discard as burn-in. Defaults to \code{model_fit$burnin}, and must be -provided if \code{model_fit$burnin} does not exist. See \code{\link{assess_convergence}}} - -\item{C}{Number of cluster} - -\item{colnames}{A vector of item names. If NULL, we generate generic names for the items in the ranking.} - -\item{items}{Either a vector of item names, or a -vector of indices. If NULL, five items are selected randomly.} -} -\description{ -Plot the posterior for rho for each item -} diff --git a/man/smc_mallows_new_item_rank.Rd b/man/smc_mallows_new_item_rank.Rd index 95630095..03b148e5 100644 --- a/man/smc_mallows_new_item_rank.Rd +++ b/man/smc_mallows_new_item_rank.Rd @@ -8,8 +8,6 @@ smc_mallows_new_item_rank( n_items, R_obs, - metric, - leap_size, N, Time, logz_estimate, @@ -18,12 +16,14 @@ smc_mallows_new_item_rank( rho_samples_init = NULL, alpha_samples_init = 0L, alpha = 0, - alpha_prop_sd = 1, - lambda = 1, - alpha_max = 1, + alpha_prop_sd = 0.5, + lambda = 0.1, + alpha_max = 1e+06, aug_method = "random", verbose = FALSE, - alpha_fixed = FALSE + alpha_fixed = FALSE, + metric = "footrule", + leap_size = 1L ) smc_mallows_new_item_rank_alpha_fixed( @@ -48,14 +48,6 @@ smc_mallows_new_item_rank_alpha_fixed( \item{R_obs}{3D matrix of size n_assessors by n_items by Time containing a set of observed rankings of Time time steps} -\item{metric}{A character string specifying the distance metric to use in the -Bayesian Mallows Model. Available options are \code{"footrule"}, -\code{"spearman"}, \code{"cayley"}, \code{"hamming"}, \code{"kendall"}, and -\code{"ulam"}.} - -\item{leap_size}{leap_size Integer specifying the step size of the leap-and-shift -proposal distribution} - \item{N}{Integer specifying the number of particles} \item{Time}{Integer specifying the number of time steps in the SMC algorithm} @@ -81,6 +73,14 @@ prior distribution.} SMC-Mallows algorithm. Defaults to \code{FALSE}.} \item{alpha_fixed}{Logical indicating whether to sample \code{alpha} or not.} + +\item{metric}{A character string specifying the distance metric to use in the +Bayesian Mallows Model. Available options are \code{"footrule"}, +\code{"spearman"}, \code{"cayley"}, \code{"hamming"}, \code{"kendall"}, and +\code{"ulam"}.} + +\item{leap_size}{leap_size Integer specifying the step size of the leap-and-shift +proposal distribution} } \value{ a 3d matrix containing: the samples of: rho, alpha and the augmented rankings, and the effective sample size at each iteration of the SMC algorithm. diff --git a/man/smc_mallows_new_users.Rd b/man/smc_mallows_new_users.Rd index e90ca878..2d20005c 100644 --- a/man/smc_mallows_new_users.Rd +++ b/man/smc_mallows_new_users.Rd @@ -11,19 +11,19 @@ smc_mallows_new_users( R_obs, type, n_items, - metric, - leap_size, N, Time, mcmc_kernel_app, num_new_obs, - alpha_prop_sd = 1, - lambda = 1, - alpha_max = 1, + alpha_prop_sd = 0.5, + lambda = 0.1, + alpha_max = 1e+06, alpha = 0, aug_method = "random", logz_estimate = NULL, - verbose = FALSE + verbose = FALSE, + metric = "footnote", + leap_size = 1L ) smc_mallows_new_users_partial( @@ -82,14 +82,6 @@ n_assessors by n_items} \item{n_items}{Integer is the number of items in a ranking} -\item{metric}{A character string specifying the distance metric to use -in the Bayesian Mallows Model. Available options are \code{"footrule"}, -\code{"spearman"}, \code{"cayley"}, \code{"hamming"}, \code{"kendall"}, and -\code{"ulam"}.} - -\item{leap_size}{leap_size Integer specifying the step size of the -leap-and-shift proposal distribution} - \item{N}{Integer specifying the number of particles} \item{Time}{Integer specifying the number of time steps in the SMC algorithm} @@ -123,6 +115,14 @@ in the missing data, options are "pseudolikelihood" or "random".} \item{verbose}{Logical specifying whether to print out the progress of the SMC-Mallows algorithm. Defaults to \code{FALSE}.} + +\item{metric}{A character string specifying the distance metric to use +in the Bayesian Mallows Model. Available options are \code{"footrule"}, +\code{"spearman"}, \code{"cayley"}, \code{"hamming"}, \code{"kendall"}, and +\code{"ulam"}.} + +\item{leap_size}{leap_size Integer specifying the step size of the +leap-and-shift proposal distribution} } \value{ a set of particles each containing a value of rho and alpha diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 3fed3453..84a4119d 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -246,43 +246,41 @@ BEGIN_RCPP END_RCPP } // get_sample_probabilities -arma::vec get_sample_probabilities(const arma::vec rho_item_rank, const double alpha, const arma::vec remaining_set_ranks, const std::string metric, const int n_items); -RcppExport SEXP _BayesMallows_get_sample_probabilities(SEXP rho_item_rankSEXP, SEXP alphaSEXP, SEXP remaining_set_ranksSEXP, SEXP metricSEXP, SEXP n_itemsSEXP) { +arma::vec get_sample_probabilities(const arma::vec rho_item_rank, const double alpha, const arma::vec remaining_set_ranks, const int n_items, const std::string metric); +RcppExport SEXP _BayesMallows_get_sample_probabilities(SEXP rho_item_rankSEXP, SEXP alphaSEXP, SEXP remaining_set_ranksSEXP, SEXP n_itemsSEXP, SEXP metricSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const arma::vec >::type rho_item_rank(rho_item_rankSEXP); Rcpp::traits::input_parameter< const double >::type alpha(alphaSEXP); Rcpp::traits::input_parameter< const arma::vec >::type remaining_set_ranks(remaining_set_ranksSEXP); - Rcpp::traits::input_parameter< const std::string >::type metric(metricSEXP); Rcpp::traits::input_parameter< const int >::type n_items(n_itemsSEXP); - rcpp_result_gen = Rcpp::wrap(get_sample_probabilities(rho_item_rank, alpha, remaining_set_ranks, metric, n_items)); + Rcpp::traits::input_parameter< const std::string >::type metric(metricSEXP); + rcpp_result_gen = Rcpp::wrap(get_sample_probabilities(rho_item_rank, alpha, remaining_set_ranks, n_items, metric)); return rcpp_result_gen; END_RCPP } // leap_and_shift_probs -Rcpp::List leap_and_shift_probs(const arma::vec rho, const int leap_size, const int n_items); -RcppExport SEXP _BayesMallows_leap_and_shift_probs(SEXP rhoSEXP, SEXP leap_sizeSEXP, SEXP n_itemsSEXP) { +Rcpp::List leap_and_shift_probs(const arma::vec rho, const int n_items, const int leap_size); +RcppExport SEXP _BayesMallows_leap_and_shift_probs(SEXP rhoSEXP, SEXP n_itemsSEXP, SEXP leap_sizeSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const arma::vec >::type rho(rhoSEXP); - Rcpp::traits::input_parameter< const int >::type leap_size(leap_sizeSEXP); Rcpp::traits::input_parameter< const int >::type n_items(n_itemsSEXP); - rcpp_result_gen = Rcpp::wrap(leap_and_shift_probs(rho, leap_size, n_items)); + Rcpp::traits::input_parameter< const int >::type leap_size(leap_sizeSEXP); + rcpp_result_gen = Rcpp::wrap(leap_and_shift_probs(rho, n_items, leap_size)); return rcpp_result_gen; END_RCPP } // smc_mallows_new_item_rank -Rcpp::List smc_mallows_new_item_rank(const unsigned int& n_items, arma::cube& R_obs, const std::string& metric, const int& leap_size, const unsigned int& N, const unsigned int Time, const Rcpp::Nullable logz_estimate, const int& mcmc_kernel_app, Rcpp::Nullable aug_rankings_init, Rcpp::Nullable rho_samples_init, arma::vec alpha_samples_init, const double alpha, const double alpha_prop_sd, const double lambda, const double alpha_max, const std::string& aug_method, const bool verbose, const bool alpha_fixed); -RcppExport SEXP _BayesMallows_smc_mallows_new_item_rank(SEXP n_itemsSEXP, SEXP R_obsSEXP, SEXP metricSEXP, SEXP leap_sizeSEXP, SEXP NSEXP, SEXP TimeSEXP, SEXP logz_estimateSEXP, SEXP mcmc_kernel_appSEXP, SEXP aug_rankings_initSEXP, SEXP rho_samples_initSEXP, SEXP alpha_samples_initSEXP, SEXP alphaSEXP, SEXP alpha_prop_sdSEXP, SEXP lambdaSEXP, SEXP alpha_maxSEXP, SEXP aug_methodSEXP, SEXP verboseSEXP, SEXP alpha_fixedSEXP) { +Rcpp::List smc_mallows_new_item_rank(const unsigned int& n_items, arma::cube& R_obs, const unsigned int& N, const unsigned int Time, const Rcpp::Nullable logz_estimate, const int& mcmc_kernel_app, Rcpp::Nullable aug_rankings_init, Rcpp::Nullable rho_samples_init, arma::vec alpha_samples_init, const double alpha, const double alpha_prop_sd, const double lambda, const double alpha_max, const std::string& aug_method, const bool verbose, const bool alpha_fixed, const std::string& metric, const int& leap_size); +RcppExport SEXP _BayesMallows_smc_mallows_new_item_rank(SEXP n_itemsSEXP, SEXP R_obsSEXP, SEXP NSEXP, SEXP TimeSEXP, SEXP logz_estimateSEXP, SEXP mcmc_kernel_appSEXP, SEXP aug_rankings_initSEXP, SEXP rho_samples_initSEXP, SEXP alpha_samples_initSEXP, SEXP alphaSEXP, SEXP alpha_prop_sdSEXP, SEXP lambdaSEXP, SEXP alpha_maxSEXP, SEXP aug_methodSEXP, SEXP verboseSEXP, SEXP alpha_fixedSEXP, SEXP metricSEXP, SEXP leap_sizeSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const unsigned int& >::type n_items(n_itemsSEXP); Rcpp::traits::input_parameter< arma::cube& >::type R_obs(R_obsSEXP); - Rcpp::traits::input_parameter< const std::string& >::type metric(metricSEXP); - Rcpp::traits::input_parameter< const int& >::type leap_size(leap_sizeSEXP); Rcpp::traits::input_parameter< const unsigned int& >::type N(NSEXP); Rcpp::traits::input_parameter< const unsigned int >::type Time(TimeSEXP); Rcpp::traits::input_parameter< const Rcpp::Nullable >::type logz_estimate(logz_estimateSEXP); @@ -297,21 +295,21 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const std::string& >::type aug_method(aug_methodSEXP); Rcpp::traits::input_parameter< const bool >::type verbose(verboseSEXP); Rcpp::traits::input_parameter< const bool >::type alpha_fixed(alpha_fixedSEXP); - rcpp_result_gen = Rcpp::wrap(smc_mallows_new_item_rank(n_items, R_obs, metric, leap_size, N, Time, logz_estimate, mcmc_kernel_app, aug_rankings_init, rho_samples_init, alpha_samples_init, alpha, alpha_prop_sd, lambda, alpha_max, aug_method, verbose, alpha_fixed)); + Rcpp::traits::input_parameter< const std::string& >::type metric(metricSEXP); + Rcpp::traits::input_parameter< const int& >::type leap_size(leap_sizeSEXP); + rcpp_result_gen = Rcpp::wrap(smc_mallows_new_item_rank(n_items, R_obs, N, Time, logz_estimate, mcmc_kernel_app, aug_rankings_init, rho_samples_init, alpha_samples_init, alpha, alpha_prop_sd, lambda, alpha_max, aug_method, verbose, alpha_fixed, metric, leap_size)); return rcpp_result_gen; END_RCPP } // smc_mallows_new_users -Rcpp::List smc_mallows_new_users(const arma::mat& R_obs, const std::string& type, const int& n_items, const std::string& metric, const int& leap_size, const int& N, int Time, const int& mcmc_kernel_app, const int& num_new_obs, const double alpha_prop_sd, const double lambda, const double alpha_max, const double alpha, const std::string& aug_method, const Rcpp::Nullable& logz_estimate, const bool verbose); -RcppExport SEXP _BayesMallows_smc_mallows_new_users(SEXP R_obsSEXP, SEXP typeSEXP, SEXP n_itemsSEXP, SEXP metricSEXP, SEXP leap_sizeSEXP, SEXP NSEXP, SEXP TimeSEXP, SEXP mcmc_kernel_appSEXP, SEXP num_new_obsSEXP, SEXP alpha_prop_sdSEXP, SEXP lambdaSEXP, SEXP alpha_maxSEXP, SEXP alphaSEXP, SEXP aug_methodSEXP, SEXP logz_estimateSEXP, SEXP verboseSEXP) { +Rcpp::List smc_mallows_new_users(const arma::mat& R_obs, const std::string& type, const int& n_items, const int& N, int Time, const int& mcmc_kernel_app, const int& num_new_obs, const double alpha_prop_sd, const double lambda, const double alpha_max, const double alpha, const std::string& aug_method, const Rcpp::Nullable& logz_estimate, const bool verbose, const std::string& metric, const int& leap_size); +RcppExport SEXP _BayesMallows_smc_mallows_new_users(SEXP R_obsSEXP, SEXP typeSEXP, SEXP n_itemsSEXP, SEXP NSEXP, SEXP TimeSEXP, SEXP mcmc_kernel_appSEXP, SEXP num_new_obsSEXP, SEXP alpha_prop_sdSEXP, SEXP lambdaSEXP, SEXP alpha_maxSEXP, SEXP alphaSEXP, SEXP aug_methodSEXP, SEXP logz_estimateSEXP, SEXP verboseSEXP, SEXP metricSEXP, SEXP leap_sizeSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const arma::mat& >::type R_obs(R_obsSEXP); Rcpp::traits::input_parameter< const std::string& >::type type(typeSEXP); Rcpp::traits::input_parameter< const int& >::type n_items(n_itemsSEXP); - Rcpp::traits::input_parameter< const std::string& >::type metric(metricSEXP); - Rcpp::traits::input_parameter< const int& >::type leap_size(leap_sizeSEXP); Rcpp::traits::input_parameter< const int& >::type N(NSEXP); Rcpp::traits::input_parameter< int >::type Time(TimeSEXP); Rcpp::traits::input_parameter< const int& >::type mcmc_kernel_app(mcmc_kernel_appSEXP); @@ -323,32 +321,34 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const std::string& >::type aug_method(aug_methodSEXP); Rcpp::traits::input_parameter< const Rcpp::Nullable& >::type logz_estimate(logz_estimateSEXP); Rcpp::traits::input_parameter< const bool >::type verbose(verboseSEXP); - rcpp_result_gen = Rcpp::wrap(smc_mallows_new_users(R_obs, type, n_items, metric, leap_size, N, Time, mcmc_kernel_app, num_new_obs, alpha_prop_sd, lambda, alpha_max, alpha, aug_method, logz_estimate, verbose)); + Rcpp::traits::input_parameter< const std::string& >::type metric(metricSEXP); + Rcpp::traits::input_parameter< const int& >::type leap_size(leap_sizeSEXP); + rcpp_result_gen = Rcpp::wrap(smc_mallows_new_users(R_obs, type, n_items, N, Time, mcmc_kernel_app, num_new_obs, alpha_prop_sd, lambda, alpha_max, alpha, aug_method, logz_estimate, verbose, metric, leap_size)); return rcpp_result_gen; END_RCPP } // metropolis_hastings_alpha -double metropolis_hastings_alpha(const double alpha, const int n_items, const arma::mat rankings, const std::string metric, const arma::vec rho, const Rcpp::Nullable logz_estimate, const double alpha_prop_sd, const double lambda, const double alpha_max); -RcppExport SEXP _BayesMallows_metropolis_hastings_alpha(SEXP alphaSEXP, SEXP n_itemsSEXP, SEXP rankingsSEXP, SEXP metricSEXP, SEXP rhoSEXP, SEXP logz_estimateSEXP, SEXP alpha_prop_sdSEXP, SEXP lambdaSEXP, SEXP alpha_maxSEXP) { +double metropolis_hastings_alpha(const double alpha, const int n_items, const arma::mat rankings, const arma::vec rho, const Rcpp::Nullable logz_estimate, const std::string metric, const double alpha_prop_sd, const double alpha_max, const double lambda); +RcppExport SEXP _BayesMallows_metropolis_hastings_alpha(SEXP alphaSEXP, SEXP n_itemsSEXP, SEXP rankingsSEXP, SEXP rhoSEXP, SEXP logz_estimateSEXP, SEXP metricSEXP, SEXP alpha_prop_sdSEXP, SEXP alpha_maxSEXP, SEXP lambdaSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const double >::type alpha(alphaSEXP); Rcpp::traits::input_parameter< const int >::type n_items(n_itemsSEXP); Rcpp::traits::input_parameter< const arma::mat >::type rankings(rankingsSEXP); - Rcpp::traits::input_parameter< const std::string >::type metric(metricSEXP); Rcpp::traits::input_parameter< const arma::vec >::type rho(rhoSEXP); Rcpp::traits::input_parameter< const Rcpp::Nullable >::type logz_estimate(logz_estimateSEXP); + Rcpp::traits::input_parameter< const std::string >::type metric(metricSEXP); Rcpp::traits::input_parameter< const double >::type alpha_prop_sd(alpha_prop_sdSEXP); - Rcpp::traits::input_parameter< const double >::type lambda(lambdaSEXP); Rcpp::traits::input_parameter< const double >::type alpha_max(alpha_maxSEXP); - rcpp_result_gen = Rcpp::wrap(metropolis_hastings_alpha(alpha, n_items, rankings, metric, rho, logz_estimate, alpha_prop_sd, lambda, alpha_max)); + Rcpp::traits::input_parameter< const double >::type lambda(lambdaSEXP); + rcpp_result_gen = Rcpp::wrap(metropolis_hastings_alpha(alpha, n_items, rankings, rho, logz_estimate, metric, alpha_prop_sd, alpha_max, lambda)); return rcpp_result_gen; END_RCPP } // metropolis_hastings_aug_ranking -arma::vec metropolis_hastings_aug_ranking(const double& alpha, const arma::vec& rho, const int& n_items, const arma::vec& partial_ranking, const arma::vec& current_ranking, const std::string& metric, const bool& pseudo); -RcppExport SEXP _BayesMallows_metropolis_hastings_aug_ranking(SEXP alphaSEXP, SEXP rhoSEXP, SEXP n_itemsSEXP, SEXP partial_rankingSEXP, SEXP current_rankingSEXP, SEXP metricSEXP, SEXP pseudoSEXP) { +arma::vec metropolis_hastings_aug_ranking(const double& alpha, const arma::vec& rho, const int& n_items, const arma::vec& partial_ranking, const arma::vec& current_ranking, const bool& pseudo, const std::string& metric); +RcppExport SEXP _BayesMallows_metropolis_hastings_aug_ranking(SEXP alphaSEXP, SEXP rhoSEXP, SEXP n_itemsSEXP, SEXP partial_rankingSEXP, SEXP current_rankingSEXP, SEXP pseudoSEXP, SEXP metricSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -357,25 +357,25 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const int& >::type n_items(n_itemsSEXP); Rcpp::traits::input_parameter< const arma::vec& >::type partial_ranking(partial_rankingSEXP); Rcpp::traits::input_parameter< const arma::vec& >::type current_ranking(current_rankingSEXP); - Rcpp::traits::input_parameter< const std::string& >::type metric(metricSEXP); Rcpp::traits::input_parameter< const bool& >::type pseudo(pseudoSEXP); - rcpp_result_gen = Rcpp::wrap(metropolis_hastings_aug_ranking(alpha, rho, n_items, partial_ranking, current_ranking, metric, pseudo)); + Rcpp::traits::input_parameter< const std::string& >::type metric(metricSEXP); + rcpp_result_gen = Rcpp::wrap(metropolis_hastings_aug_ranking(alpha, rho, n_items, partial_ranking, current_ranking, pseudo, metric)); return rcpp_result_gen; END_RCPP } // metropolis_hastings_rho -arma::vec metropolis_hastings_rho(const double alpha, const int n_items, const arma::mat rankings, const std::string metric, const arma::vec rho, const int leap_size); -RcppExport SEXP _BayesMallows_metropolis_hastings_rho(SEXP alphaSEXP, SEXP n_itemsSEXP, SEXP rankingsSEXP, SEXP metricSEXP, SEXP rhoSEXP, SEXP leap_sizeSEXP) { +arma::vec metropolis_hastings_rho(const double alpha, const int n_items, const arma::mat rankings, const arma::vec rho, const std::string metric, const int leap_size); +RcppExport SEXP _BayesMallows_metropolis_hastings_rho(SEXP alphaSEXP, SEXP n_itemsSEXP, SEXP rankingsSEXP, SEXP rhoSEXP, SEXP metricSEXP, SEXP leap_sizeSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const double >::type alpha(alphaSEXP); Rcpp::traits::input_parameter< const int >::type n_items(n_itemsSEXP); Rcpp::traits::input_parameter< const arma::mat >::type rankings(rankingsSEXP); - Rcpp::traits::input_parameter< const std::string >::type metric(metricSEXP); Rcpp::traits::input_parameter< const arma::vec >::type rho(rhoSEXP); + Rcpp::traits::input_parameter< const std::string >::type metric(metricSEXP); Rcpp::traits::input_parameter< const int >::type leap_size(leap_sizeSEXP); - rcpp_result_gen = Rcpp::wrap(metropolis_hastings_rho(alpha, n_items, rankings, metric, rho, leap_size)); + rcpp_result_gen = Rcpp::wrap(metropolis_hastings_rho(alpha, n_items, rankings, rho, metric, leap_size)); return rcpp_result_gen; END_RCPP } diff --git a/src/smc.h b/src/smc.h index 4b6817f4..40250288 100644 --- a/src/smc.h +++ b/src/smc.h @@ -6,12 +6,12 @@ arma::vec normalize_weights(const arma::vec& log_inc_wgt); arma::vec initialize_alpha(const int& N); double get_exponent_sum(double, arma::vec, int, arma::mat, std::string); -arma::vec metropolis_hastings_rho(double, int, arma::mat, std::string, arma::vec, int); -double metropolis_hastings_alpha(double, int, arma::mat, std::string, arma::vec, const Rcpp::Nullable, double, double, double); -arma::vec get_sample_probabilities(arma::vec, double, arma::vec, std::string, int); +arma::vec metropolis_hastings_rho(double, int, arma::mat, arma::vec, std::string, int); +double metropolis_hastings_alpha(double, int, arma::mat, arma::vec, const Rcpp::Nullable, std::string, double, double, double); +arma::vec get_sample_probabilities(arma::vec, double, arma::vec, int, std::string); Rcpp::List calculate_forward_probability(arma::uvec, arma::vec, arma::vec, arma::vec, double, int, std::string); double calculate_backward_probability(arma::uvec, arma::vec, arma::vec, arma::vec, arma::vec, double, int, std::string); Rcpp::List correction_kernel(arma::vec, arma::vec, int); Rcpp::List correction_kernel_pseudo(arma::vec, arma::vec, arma::vec, double, int, std::string); -arma::vec metropolis_hastings_aug_ranking(const double&, const arma::vec&, const int&, const arma::vec&, const arma::vec&, const std::string&, const bool&); +arma::vec metropolis_hastings_aug_ranking(const double&, const arma::vec&, const int&, const arma::vec&, const arma::vec&, const bool&, const std::string&); #endif diff --git a/src/smc_calculate_probability.cpp b/src/smc_calculate_probability.cpp index 975b796a..8c90f91d 100644 --- a/src/smc_calculate_probability.cpp +++ b/src/smc_calculate_probability.cpp @@ -16,8 +16,8 @@ double smc_calculate_probability( const uword& num_items_unranked, const double& alpha, const int& n_items, - const std::string& metric, - const bool& forward + const bool& forward, + const std::string& metric = "footrule" ) { for (uword jj = 0; jj < num_items_unranked - 1; ++jj) { // items to sample rank @@ -30,7 +30,7 @@ double smc_calculate_probability( // next we get the sample probabilites for selecting a particular rank for // an item based on the current alpha and the rho rank for that item vec sample_prob_list = get_sample_probabilities( - rho_item_rank, alpha, remaining_set, metric, n_items + rho_item_rank, alpha, remaining_set, n_items, metric ); // fill in the new augmented ranking going forward @@ -80,7 +80,7 @@ double calculate_backward_probability( const arma::vec rho, const double alpha, const int n_items, - const std::string metric + const std::string metric = "footrule" ) { // given an old and new item ordering, sample ranking with new ordering and // calc the forward and backward prob @@ -108,7 +108,7 @@ double calculate_backward_probability( backward_auxiliary_ranking_probability = smc_calculate_probability( backward_auxiliary_ranking_probability, remaining_set, item_ordering, - current_ranking, rho, num_items_unranked, alpha, n_items, metric, false + current_ranking, rho, num_items_unranked, alpha, n_items, false, metric ); } return backward_auxiliary_ranking_probability; @@ -146,7 +146,7 @@ Rcpp::List calculate_forward_probability( const arma::vec rho, const double alpha, const int n_items, - const std::string metric + const std::string metric = "footrule" ) { // item ordering is the order of which items are assigned ranks in a specified // order @@ -173,7 +173,7 @@ Rcpp::List calculate_forward_probability( forward_auxiliary_ranking_probability = smc_calculate_probability( forward_auxiliary_ranking_probability, remaining_set, item_ordering, - auxiliary_ranking, rho, num_items_unranked, alpha, n_items, metric, true + auxiliary_ranking, rho, num_items_unranked, alpha, n_items, true, metric ); // last element in augmented ranking is deterministic - the prob is 1 auxiliary_ranking(num_items_unranked - 1) = as_scalar(remaining_set); diff --git a/src/smc_correction_kernel_pseudo.cpp b/src/smc_correction_kernel_pseudo.cpp index b2278e51..529dc166 100644 --- a/src/smc_correction_kernel_pseudo.cpp +++ b/src/smc_correction_kernel_pseudo.cpp @@ -29,7 +29,7 @@ Rcpp::List correction_kernel_pseudo( const arma::vec rho, const double alpha, const int n_items, - const std::string metric + const std::string metric = "footrule" ) { bool observed_equals_current = approx_equal(\ observed_ranking, current_ranking, "absdiff", 0.1\ @@ -78,7 +78,7 @@ Rcpp::List correction_kernel_pseudo( // next we get the sample probabilites for selecting a particular rank for // an item based on the current alpha and the rho rank for that item const vec sample_prob_list = get_sample_probabilities(\ - rho_item_rank, alpha, remaining_set, metric, n_items\ + rho_item_rank, alpha, remaining_set, n_items, metric\ ); // fill in the new augmented ranking going forward diff --git a/src/smc_get_exponent_sum.cpp b/src/smc_get_exponent_sum.cpp index fb9513fb..04015494 100644 --- a/src/smc_get_exponent_sum.cpp +++ b/src/smc_get_exponent_sum.cpp @@ -55,7 +55,7 @@ double get_exponent_sum( const arma::vec rho, const int n_items, arma::mat rankings, - const std::string metric + const std::string metric = "footrule" ) { /* Transpose matrices as needed ------------------------- */ if (rho.n_rows == rankings.n_cols) { diff --git a/src/smc_get_sample_probabilities.cpp b/src/smc_get_sample_probabilities.cpp index 6a89e745..2878f3be 100644 --- a/src/smc_get_sample_probabilities.cpp +++ b/src/smc_get_sample_probabilities.cpp @@ -25,8 +25,8 @@ arma::vec get_sample_probabilities( const arma::vec rho_item_rank, const double alpha, const arma::vec remaining_set_ranks, - const std::string metric, - const int n_items + const int n_items, + const std::string metric = "footrule" ) { // define a set of probs list const unsigned int& num_ranks = remaining_set_ranks.n_elem; diff --git a/src/smc_leap_and_shift_probs.cpp b/src/smc_leap_and_shift_probs.cpp index e90f9733..9c7a58e4 100644 --- a/src/smc_leap_and_shift_probs.cpp +++ b/src/smc_leap_and_shift_probs.cpp @@ -23,12 +23,12 @@ using namespace arma; //' rho <- c(1, 2, 3, 4, 5, 6) //' n_items <- 6 //' -//' leap_and_shift_probs(rho, 1, n_items) -//' leap_and_shift_probs(rho, 2, n_items) -//' leap_and_shift_probs(rho, 3, n_items) +//' leap_and_shift_probs(rho, n_items, 1) +//' leap_and_shift_probs(rho, n_items, 2) +//' leap_and_shift_probs(rho, n_items, 3) //' // [[Rcpp::export]] -Rcpp::List leap_and_shift_probs(const arma::vec rho, const int leap_size, const int n_items) { +Rcpp::List leap_and_shift_probs(const arma::vec rho, const int n_items, const int leap_size = 1) { vec rho_proposal{}; uvec indices{}; double prob_forward, prob_backward; diff --git a/src/smc_mallows_new_item_rank.cpp b/src/smc_mallows_new_item_rank.cpp index cf50ee37..2a48d012 100644 --- a/src/smc_mallows_new_item_rank.cpp +++ b/src/smc_mallows_new_item_rank.cpp @@ -14,38 +14,41 @@ void new_items_move_step( mat& alpha_samples, cube& aug_rankings, const cube& R_obs, - const std::string& metric, const std::string& aug_method, const Rcpp::Nullable& logz_estimate, const double& alpha, - const double& alpha_prop_sd, - const double& lambda, - const double& alpha_max, const uword& ttplus1, - const int& leap_size, - const bool& alpha_fixed + const bool& alpha_fixed, + const std::string& metric = "footrule", + const int& leap_size = 1, + const double& alpha_prop_sd = 0.5, + const double& alpha_max = 1e6, + const double& lambda = 0.1 ){ const uword num_ranks = R_obs.n_rows; const uword N = rho_samples.n_rows; int n_items = rho_samples.n_cols; for (uword ii = 0; ii < N; ++ii) { rho_samples.slice(ttplus1).row(ii) = metropolis_hastings_rho( - alpha_fixed ? alpha : alpha_samples(ii, ttplus1), n_items, aug_rankings.slice(ii), metric, - rho_samples.slice(ttplus1).row(ii).t(), leap_size + alpha_fixed ? alpha : alpha_samples(ii, ttplus1), n_items, aug_rankings.slice(ii), + rho_samples.slice(ttplus1).row(ii).t(), metric, leap_size ).t(); if(!alpha_fixed){ alpha_samples(ii, ttplus1) = metropolis_hastings_alpha( - alpha_samples(ii, ttplus1), n_items, aug_rankings.slice(ii), metric, + alpha_samples(ii, ttplus1), n_items, aug_rankings.slice(ii), rho_samples.slice(ttplus1).row(ii).t(), logz_estimate, - alpha_prop_sd, lambda, alpha_max); + metric, alpha_prop_sd, alpha_max, lambda + ); } for (uword jj = 0; jj < num_ranks; ++jj) { vec mh_aug_result = metropolis_hastings_aug_ranking( \ alpha_fixed ? alpha : alpha_samples(ii, ttplus1), rho_samples.slice(ttplus1).row(ii).t(),\ n_items, R_obs.slice(ttplus1).row(jj).t(), \ - aug_rankings.slice(ii).row(jj).t(), metric, - (aug_method == "pseudolikelihood") && ((metric == "footrule") || (metric == "spearman"))); + aug_rankings.slice(ii).row(jj).t(), + (aug_method == "pseudolikelihood") && ((metric == "footrule") || (metric == "spearman")), + metric + ); aug_rankings.slice(ii).row(jj) = mh_aug_result.t(); } } @@ -54,13 +57,13 @@ void new_items_move_step( arma::cube augment_rankings( const unsigned int& n_items, arma::cube& R_obs, - const std::string& metric, const unsigned int& N, arma::cube rho_samples, arma::mat alpha_samples, const double alpha = 0, const std::string& aug_method = "random", - const bool alpha_fixed = false + const bool alpha_fixed = false, + const std::string& metric = "footrule" ) { /* ====================================================== */ /* Augment Rankings */ @@ -147,8 +150,6 @@ arma::cube augment_rankings( Rcpp::List smc_mallows_new_item_rank( const unsigned int& n_items, arma::cube& R_obs, - const std::string& metric, - const int& leap_size, const unsigned int& N, const unsigned int Time, const Rcpp::Nullable logz_estimate, @@ -157,12 +158,14 @@ Rcpp::List smc_mallows_new_item_rank( Rcpp::Nullable rho_samples_init = R_NilValue, arma::vec alpha_samples_init = 0, const double alpha = 0, - const double alpha_prop_sd = 1, - const double lambda = 1, - const double alpha_max = 1, + const double alpha_prop_sd = 0.5, + const double lambda = 0.1, + const double alpha_max = 1e6, const std::string& aug_method = "random", const bool verbose = false, - const bool alpha_fixed = false + const bool alpha_fixed = false, + const std::string& metric = "footrule", + const int& leap_size = 1 ) { /* ====================================================== */ /* Initialise Phase */ @@ -207,8 +210,8 @@ Rcpp::List smc_mallows_new_item_rank( arma::cube aug_rankings_init_2 = aug_rankings_init.isNotNull() ? Rcpp::as(aug_rankings_init) : arma::cube(0,0,0); if (aug_rankings_init_2.size() == 0) { aug_rankings = augment_rankings( - n_items, R_obs, metric, N, rho_samples, alpha_samples, alpha, aug_method, - alpha_fixed + n_items, R_obs, N, rho_samples, alpha_samples, alpha, aug_method, + alpha_fixed, metric ); } else { aug_rankings = aug_rankings_init_2; @@ -309,26 +312,30 @@ Rcpp::List smc_mallows_new_item_rank( /* Move step */ /* ====================================================== */ new_items_move_step( - rho_samples, alpha_samples, aug_rankings, R_obs, metric, aug_method, - logz_estimate, alpha, alpha_prop_sd, lambda, alpha_max, tt + 1, leap_size, - alpha_fixed); + rho_samples, alpha_samples, aug_rankings, R_obs, aug_method, + logz_estimate, alpha, tt + 1, + alpha_fixed, metric, leap_size, alpha_prop_sd, alpha_max, lambda); } /* ====================================================== */ /* Post Processing */ /* ====================================================== */ if (alpha_fixed) { - return Rcpp::List::create( + Rcpp::List particle_history = Rcpp::List::create( Rcpp::Named("rho_samples") = rho_samples, Rcpp::Named("augmented_rankings") = aug_rankings, Rcpp::Named("ESS") = ESS_vec ); + particle_history.attr("class") = "SMCMallows"; + return particle_history; } else { - return Rcpp::List::create( + Rcpp::List particle_history = Rcpp::List::create( Rcpp::Named("rho_samples") = rho_samples, Rcpp::Named("alpha_samples") = alpha_samples, Rcpp::Named("augmented_rankings") = aug_rankings, Rcpp::Named("ESS") = ESS_vec ); + particle_history.attr("class") = "SMCMallows"; + return particle_history; } } diff --git a/src/smc_mallows_new_users.cpp b/src/smc_mallows_new_users.cpp index 60c66e41..70a46eb6 100644 --- a/src/smc_mallows_new_users.cpp +++ b/src/smc_mallows_new_users.cpp @@ -59,19 +59,19 @@ Rcpp::List smc_mallows_new_users( const arma::mat& R_obs, const std::string& type, const int& n_items, - const std::string& metric, - const int& leap_size, const int& N, int Time, const int& mcmc_kernel_app, const int& num_new_obs, - const double alpha_prop_sd = 1, - const double lambda = 1, - const double alpha_max = 1, + const double alpha_prop_sd = 0.5, + const double lambda = 0.1, + const double alpha_max = 1e6, const double alpha = 0, const std::string& aug_method = "random", const Rcpp::Nullable& logz_estimate = R_NilValue, - const bool verbose = false + const bool verbose = false, + const std::string& metric = "footnote", + const int& leap_size = 1 ) { /* ====================================================== */ /* Initialise Phase */ @@ -143,7 +143,7 @@ Rcpp::List smc_mallows_new_users( if(type == "partial" || type == "partial_alpha_fixed"){ smc_mallows_new_users_augment_partial( aug_rankings, aug_prob, rho_samples, alpha_samples, num_obs, num_new_obs, - R_obs, aug_method, metric, tt, alpha, type != "partial_alpha_fixed"); + R_obs, aug_method, tt, alpha, type != "partial_alpha_fixed", metric); } /* ====================================================== */ @@ -156,8 +156,8 @@ Rcpp::List smc_mallows_new_users( vec norm_wgt; smc_mallows_new_users_reweight( log_inc_wgt, ESS_vec, norm_wgt, aug_rankings, new_observed_rankings, rho_samples, - alpha, alpha_samples, tt, logz_estimate, metric, num_obs, - num_new_obs, ones(N), type != "partial_alpha_fixed", type != "complete"); + alpha, alpha_samples, tt, logz_estimate, num_obs, num_new_obs, ones(N), + type != "partial_alpha_fixed", type != "complete", metric); /* ====================================================== */ /* Resample */ @@ -180,11 +180,11 @@ Rcpp::List smc_mallows_new_users( rho_samples(span(ii), span::all, span(tt + 1)); rho_samples(span(ii), span::all, span(tt + 1)) = \ metropolis_hastings_rho( \ - as, n_items, all_observed_rankings, metric, rs.t(), leap_size\ + as, n_items, all_observed_rankings, rs.t(), metric, leap_size\ ); alpha_samples(ii, tt + 1) = metropolis_hastings_alpha( \ - as, n_items, all_observed_rankings, metric, rs.t(), logz_estimate,\ - alpha_prop_sd, lambda, alpha_max \ + as, n_items, all_observed_rankings, rs.t(), logz_estimate,\ + metric, alpha_prop_sd, alpha_max, lambda\ ); } } else if(type == "partial" || type == "partial_alpha_fixed"){ @@ -198,12 +198,12 @@ Rcpp::List smc_mallows_new_users( // the MCMC kernels rho_samples(span(ii), span::all, span(tt + 1)) = \ metropolis_hastings_rho( \ - as, n_items, all_observed_rankings, metric, rs.t(), leap_size\ + as, n_items, all_observed_rankings, rs.t(), metric, leap_size\ ); if(type == "partial"){ - alpha_samples(ii, tt + 1) = metropolis_hastings_alpha( \ - as, n_items, all_observed_rankings, metric, rs.t(), logz_estimate,\ - alpha_prop_sd, lambda, alpha_max \ + alpha_samples(ii, tt + 1) = metropolis_hastings_alpha(\ + as, n_items, all_observed_rankings, rs.t(), logz_estimate,\ + metric, alpha_prop_sd, alpha_max, lambda\ ); } for (uword jj = num_obs - num_new_obs; jj < num_obs; ++jj) { @@ -211,9 +211,9 @@ Rcpp::List smc_mallows_new_users( ar = aug_rankings(span(jj), span::all, span(ii)); vec mh_aug_result; if (aug_method == "random") { - mh_aug_result = metropolis_hastings_aug_ranking(as, rs.t(), n_items, R_obs.row(jj).t(), ar.t(), metric, false); + mh_aug_result = metropolis_hastings_aug_ranking(as, rs.t(), n_items, R_obs.row(jj).t(), ar.t(), false, metric); } else if ((aug_method == "pseudolikelihood") && ((metric == "footrule") || (metric == "spearman"))) { - mh_aug_result = metropolis_hastings_aug_ranking(as, rs.t(), n_items, R_obs.row(jj).t(), ar.t(), metric, true); + mh_aug_result = metropolis_hastings_aug_ranking(as, rs.t(), n_items, R_obs.row(jj).t(), ar.t(), true, metric); } aug_rankings(span(jj), span::all, span(ii)) = mh_aug_result; } @@ -228,6 +228,6 @@ Rcpp::List smc_mallows_new_users( Rcpp::Named("augmented_rankings") = aug_rankings, Rcpp::Named("ESS") = ESS_vec ); - if(type == "complete") particle_history.attr("class") = "SMCMallows"; // TODO: add List + particle_history.attr("class") = "SMCMallows"; // TODO: add List return particle_history; } diff --git a/src/smc_mallows_new_users.h b/src/smc_mallows_new_users.h index 51f7a9e2..f45035a9 100644 --- a/src/smc_mallows_new_users.h +++ b/src/smc_mallows_new_users.h @@ -5,13 +5,14 @@ void smc_mallows_new_users_augment_partial(arma::cube&, arma::vec&, const arma::cube&, const arma::mat&, const int&, const int&, - const arma::mat&, const std::string&, const std::string&, const int&, - const double&, const bool&); + const arma::mat&, const std::string&, const int&, const double&, + const bool&, const std::string&); void smc_mallows_new_users_reweight( arma::vec&, arma::rowvec&, arma::vec&, const arma::cube&, const arma::mat&, const arma::cube&, const double&, - const arma::mat&, const int&, const Rcpp::Nullable, const std::string&, - const int&, const int&, const arma::vec&, const bool&, const bool&); + const arma::mat&, const int&, const Rcpp::Nullable, + const int&, const int&, const arma::vec&, const bool&, const bool&, + const std::string&); void smc_mallows_new_users_resample( arma::cube&, arma::mat&, arma::cube&, const arma::vec&, const int& tt, const int& num_obs, const bool& augment_alpha, const bool& partial); diff --git a/src/smc_mallows_new_users_funs.cpp b/src/smc_mallows_new_users_funs.cpp index 4b64890d..f54c31ff 100644 --- a/src/smc_mallows_new_users_funs.cpp +++ b/src/smc_mallows_new_users_funs.cpp @@ -18,10 +18,10 @@ void smc_mallows_new_users_augment_partial( const int& num_new_obs, const arma::mat& R_obs, const std::string& aug_method, - const std::string& metric, const int& tt, const double& alpha, - const bool& augment_alpha + const bool& augment_alpha, + const std::string& metric = "footrule" ){ int N = rho_samples.n_rows; int n_items = rho_samples.n_cols; @@ -106,12 +106,12 @@ void smc_mallows_new_users_reweight( const mat& alpha_samples, const int& tt, const Rcpp::Nullable logz_estimate, - const std::string& metric, const int& num_obs, const int& num_new_obs, const vec& aug_prob, const bool& augment_alpha, - const bool& partial + const bool& partial, + const std::string& metric = "footrule" ){ int N = rho_samples.n_rows; int n_items = rho_samples.n_cols; diff --git a/src/smc_metropolis_hastings_alpha.cpp b/src/smc_metropolis_hastings_alpha.cpp index 145a48d1..d905703f 100644 --- a/src/smc_metropolis_hastings_alpha.cpp +++ b/src/smc_metropolis_hastings_alpha.cpp @@ -43,12 +43,12 @@ double metropolis_hastings_alpha( const double alpha, const int n_items, const arma::mat rankings, - const std::string metric, const arma::vec rho, const Rcpp::Nullable logz_estimate, - const double alpha_prop_sd, - const double lambda, - const double alpha_max + const std::string metric = "footrule", + const double alpha_prop_sd = 0.5, + const double alpha_max = 1e6, + const double lambda = 0.1 ) { const double rand = R::rnorm(0, 1); const double alpha_prime_log = rand * alpha_prop_sd + std::log(alpha); diff --git a/src/smc_metropolis_hastings_aug_ranking.cpp b/src/smc_metropolis_hastings_aug_ranking.cpp index 60fd21c7..bb65069a 100644 --- a/src/smc_metropolis_hastings_aug_ranking.cpp +++ b/src/smc_metropolis_hastings_aug_ranking.cpp @@ -29,8 +29,8 @@ arma::vec metropolis_hastings_aug_ranking( const int& n_items, const arma::vec& partial_ranking, const arma::vec& current_ranking, - const std::string& metric, - const bool& pseudo + const bool& pseudo, + const std::string& metric = "footnote" ) { double forward_backward_prob{}; vec proposed_ranking; diff --git a/src/smc_metropolis_hastings_rho.cpp b/src/smc_metropolis_hastings_rho.cpp index b881a4a2..741f8385 100644 --- a/src/smc_metropolis_hastings_rho.cpp +++ b/src/smc_metropolis_hastings_rho.cpp @@ -46,9 +46,9 @@ arma::vec metropolis_hastings_rho( const double alpha, const int n_items, const arma::mat rankings, - const std::string metric, const arma::vec rho, - const int leap_size + const std::string metric = "footnote", + const int leap_size = 1 ) { // create new potential consensus ranking vec rho_proposal{}; diff --git a/tests/testthat/test-bulletproofing.R b/tests/testthat/test-bulletproofing.R index e3cce501..fd0492f6 100644 --- a/tests/testthat/test-bulletproofing.R +++ b/tests/testthat/test-bulletproofing.R @@ -28,70 +28,70 @@ test_that("Functions accept rankings and rho as row or column vectors", { mhr_r_r <- metropolis_hastings_rho( alpha_init, n_items, rankings = as.matrix(cluster_rankings), - metric, rho = rho_init, + metric, leap_size ) mha_r_r <- metropolis_hastings_alpha( alpha_init, n_items, rankings = as.matrix(cluster_rankings), - metric, rho = rho_init, logz_estimate, + metric, alpha_prop_sd, - lambda, - alpha_max + alpha_max, + lambda ) mhr_rt_r <- metropolis_hastings_rho( alpha_init, n_items, rankings = t(cluster_rankings), - metric, rho = rho_init, + metric, leap_size ) mha_rt_r <- metropolis_hastings_alpha( alpha_init, n_items, rankings = t(cluster_rankings), - metric, rho = rho_init, logz_estimate, + metric, alpha_prop_sd, - lambda, - alpha_max + alpha_max, + lambda ) mhr_r_rt <- metropolis_hastings_rho( alpha_init, n_items, rankings = as.matrix(cluster_rankings), - metric, rho = t(rho_init), + metric, leap_size ) mha_r_rt <- metropolis_hastings_alpha( alpha_init, n_items, rankings = as.matrix(cluster_rankings), - metric, rho = t(rho_init), logz_estimate, + metric, alpha_prop_sd, - lambda, - alpha_max + alpha_max, + lambda ) mhr_rt_rt <- metropolis_hastings_rho( alpha_init, n_items, rankings = t(cluster_rankings), - metric, rho = t(rho_init), + metric, leap_size ) mha_rt_rt <- metropolis_hastings_alpha( alpha_init, n_items, rankings = t(cluster_rankings), - metric, rho = t(rho_init), logz_estimate, + metric, alpha_prop_sd, - lambda, - alpha_max + alpha_max, + lambda ) expect_equal(dim(mhr_r_r), c(10, 1)) expect_length(mha_r_r, 1) diff --git a/tests/testthat/test-smc_individual_functions.R b/tests/testthat/test-smc_individual_functions.R index 6d4868a2..0fe5e0e8 100644 --- a/tests/testthat/test-smc_individual_functions.R +++ b/tests/testthat/test-smc_individual_functions.R @@ -170,27 +170,26 @@ logz_estimate <- estimate_partition_function( set.seed(101) test_1_a <- metropolis_hastings_alpha_old(alpha, n_items, rankings, metric, rho, logz_estimate) test_1_b <- metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.5, - lambda = 0.1, alpha_max = 20 + alpha, n_items, rankings, rho, logz_estimate, alpha_prop_sd = 0.5, + lambda = 0.1, alpha_max = 20, metric ) set.seed(101) test_2_a <- metropolis_hastings_alpha_old( alpha, n_items, rankings, metric, rho, logz_estimate ) test_2_b <- metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.15, lambda = 0.1, alpha_max = 20 + alpha, n_items, rankings, rho, logz_estimate, alpha_prop_sd = 0.15, + lambda = 0.1, alpha_max = 20, metric ) set.seed(101) test_3_b <- metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 20 + alpha, n_items, rankings, rho, logz_estimate, alpha_prop_sd = 0.5, + lambda = 0.15, alpha_max = 20, metric ) set.seed(101) test_4_b <- metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.15, lambda = 0.15, alpha_max = 20 + alpha, n_items, rankings, rho, logz_estimate, alpha_prop_sd = 0.15, + lambda = 0.15, alpha_max = 20, metric ) test_that("metropolis_hastings_alpha() works as expected", { diff --git a/tests/testthat/test-smc_mallows_new_item_rank.R b/tests/testthat/test-smc_mallows_new_item_rank.R index 512c4802..2da1178d 100644 --- a/tests/testthat/test-smc_mallows_new_item_rank.R +++ b/tests/testthat/test-smc_mallows_new_item_rank.R @@ -56,12 +56,13 @@ test_that("Produces the wrong metric and aug_method error", { }) test_that("smc_mallows_new_item_rank_alpha_fixed is deprecated", { - expect_warning(smc_mallows_new_item_rank_alpha_fixed( - alpha = alpha_0, n_items = n_items, R_obs = sample_dataset, - metric = "footrule", leap_size = leap_size, N = N, Time = Time, - logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, - alpha_prop_sd = alpha_prop_sd, lambda = lambda, - alpha_max = alpha_max, aug_method = "random" + expect_warning( + smc_mallows_new_item_rank_alpha_fixed( + alpha = alpha_0, n_items = n_items, R_obs = sample_dataset, + metric = "footrule", leap_size = leap_size, N = N, Time = Time, + logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, + alpha_prop_sd = alpha_prop_sd, lambda = lambda, + alpha_max = alpha_max, aug_method = "random" ), "'smc_mallows_new_item_rank_alpha_fixed' is deprecated." ) @@ -78,7 +79,7 @@ test_that("Runs with unif kernel", { alpha_max = alpha_max, aug_method = "random", alpha_fixed = TRUE ) ) - expect_is(smc_unif_alpha_fixed_unif, "list") + expect_is(smc_unif_alpha_fixed_unif, "SMCMallows") expect_length(smc_unif_alpha_fixed_unif, 3) expect_equal(dim(smc_unif_alpha_fixed_unif$rho_samples), c(N, 6, 31)) expect_equal( @@ -95,7 +96,7 @@ test_that("Runs with unif kernel", { alpha_max = alpha_max, aug_method = "random" ) ) - expect_is(smc_unif, "list") + expect_is(smc_unif, "SMCMallows") expect_length(smc_unif, 4) expect_equal(dim(smc_unif$rho_samples), c(N, 6, 31)) expect_equal(dim(smc_unif$alpha_samples), c(N, 31)) @@ -112,7 +113,7 @@ test_that("Runs with pseudo kernel", { alpha_fixed = TRUE ) ) - expect_is(smc_unif_alpha_fixed_unif, "list") + expect_is(smc_unif_alpha_fixed_unif, "SMCMallows") expect_length(smc_unif_alpha_fixed_unif, 3) expect_equal(dim(smc_unif_alpha_fixed_unif$rho_samples), c(N, 6, 31)) @@ -126,7 +127,7 @@ test_that("Runs with pseudo kernel", { alpha_max = alpha_max, aug_method = "pseudolikelihood" ) ) - expect_is(smc_unif, "list") + expect_is(smc_unif, "SMCMallows") expect_length(smc_unif, 4) expect_equal(dim(smc_unif$rho_samples), c(N, 6, 31)) expect_equal(dim(smc_unif$alpha_samples), c(N, 31)) diff --git a/tests/testthat/test-smc_mallows_partial_rankings.R b/tests/testthat/test-smc_mallows_partial_rankings.R index f56e2a89..c599e7e8 100644 --- a/tests/testthat/test-smc_mallows_partial_rankings.R +++ b/tests/testthat/test-smc_mallows_partial_rankings.R @@ -142,7 +142,7 @@ test_that("Runs with unif kernel", { ), "'smc_mallows_new_users_partial_alpha_fixed' is deprecated." ) - expect_is(smc_unif_alpha_fixed_unif, "list") + expect_is(smc_unif_alpha_fixed_unif, "SMCMallows") expect_equal(length(smc_unif_alpha_fixed_unif), 4) expect_equal(dim(smc_unif_alpha_fixed_unif$rho_samples), c(N, 10, 21)) smc_unif <- smc_mallows_new_users( @@ -179,20 +179,22 @@ test_that("Runs with unif kernel", { ), "'smc_mallows_new_users_partial' is deprecated." ) - expect_is(smc_unif, "list") + expect_is(smc_unif, "SMCMallows") expect_equal(length(smc_unif), 4) expect_equal(dim(smc_unif$rho_samples), c(N, 10, 21)) expect_equal(dim(smc_unif$alpha_samples), c(N, 21)) - expect_s3_class( + expect_s3_class(plot(smc_unif, burnin = 2), "ggplot") + expect_s3_class(plot(smc_unif, parameter = "rho"), "ggplot") + expect_warning( plot_alpha_posterior(smc_unif$alpha_samples[, Time + 1], nmc = N, burnin = 2), - "ggplot" + "'plot_alpha_posterior' is deprecated" ) - - expect_s3_class( - plot_rho_posterior(smc_unif$rho_samples[, , Time + 1], nmc = N, burnin = 2, C = 1), - "ggplot" + expect_warning( + plot_rho_posterior(smc_unif$rho_samples[, , Time + 1], nmc = N, burnin = 2), + "'plot_rho_posterior' is deprecated" ) + expect_s3_class(plot(smc_unif, parameter = "rho"), "ggplot") }) test_that("Runs with pseudo kernel", { @@ -211,7 +213,7 @@ test_that("Runs with pseudo kernel", { num_new_obs = num_new_obs, aug_method = "pseudolikelihood" ) - expect_is(smc_unif_alpha_fixed_pseudo, "list") + expect_is(smc_unif_alpha_fixed_pseudo, "SMCMallows") expect_equal(length(smc_unif_alpha_fixed_pseudo), 4) expect_equal(dim(smc_unif_alpha_fixed_pseudo$rho_samples), c(N, 10, 21)) smc_pseudo <- smc_mallows_new_users( @@ -230,7 +232,7 @@ test_that("Runs with pseudo kernel", { alpha_max = alpha_max, aug_method = "pseudolikelihood" ) - expect_is(smc_pseudo, "list") + expect_is(smc_pseudo, "SMCMallows") expect_equal(length(smc_pseudo), 4) expect_equal(dim(smc_pseudo$rho_samples), c(N, 10, 21)) expect_equal(dim(smc_pseudo$alpha_samples), c(N, 21)) diff --git a/tests/testthat/test-smc_pseudolikelihood.R b/tests/testthat/test-smc_pseudolikelihood.R index dd492622..cc7288ed 100644 --- a/tests/testthat/test-smc_pseudolikelihood.R +++ b/tests/testthat/test-smc_pseudolikelihood.R @@ -13,10 +13,10 @@ item_ordering <- c(3, 6, 4, 5) partial_ranking <- c(1, 2, NA, NA, NA, NA) remaining_set <- c(3, 4, 5, 6) test_1 <- get_sample_probabilities( - rho_item_rank = rho[3], alpha, remaining_set, metric, n_items + rho_item_rank = rho[3], alpha, remaining_set, n_items, metric ) test_2 <- get_sample_probabilities( - rho_item_rank = rho[4], alpha, remaining_set, metric, n_items + rho_item_rank = rho[4], alpha, remaining_set, n_items, metric ) test_that("get_sample_probabilities outputs as expected", { @@ -81,14 +81,14 @@ test_that("M-H aug ranking pseudo works", { R_curr <- c(1, 2, 3, 4, 5, 6) R_obs <- c(1, 2, 3, 4, 5, 6) test_1 <- metropolis_hastings_aug_ranking( - alpha, rho, n_items, R_obs, R_curr, metric, TRUE + alpha, rho, n_items, R_obs, R_curr, TRUE, metric ) expect_equal(test_1, matrix(c(1, 2, 3, 4, 5, 6))) expect_equal(all(test_1 == R_curr), TRUE) R_obs <- c(1, 2, 3, NA, NA, NA) set.seed(6220) test_2 <- metropolis_hastings_aug_ranking( - alpha, rho, n_items, R_obs, R_curr, metric, TRUE + alpha, rho, n_items, R_obs, R_curr, TRUE, metric ) expect_equal(test_2, matrix(c(1, 2, 3, 5, 6, 4))) expect_equal(all(test_2 == R_curr), FALSE) @@ -98,7 +98,7 @@ test_that("M-H aug ranking pseudo works", { test_3 <- metropolis_hastings_aug_ranking( alpha, rho, n_items, partial_ranking = R_obs, current_ranking = R_curr, - metric, TRUE + TRUE, metric ) expect_equal(test_3, matrix(c(1, 2, 4, 3, 5, 6))) expect_equal(all(test_3 == R_curr), FALSE) diff --git a/tests/testthat/test-smc_updated_new_item_rank.R b/tests/testthat/test-smc_updated_new_item_rank.R index 1593f47a..62945f60 100644 --- a/tests/testthat/test-smc_updated_new_item_rank.R +++ b/tests/testthat/test-smc_updated_new_item_rank.R @@ -72,7 +72,7 @@ smc_test_partial_unif1 <- smc_mallows_new_item_rank( alpha_fixed = TRUE ) test_that("Updated item rank output is OK", { - expect_is(smc_test_partial_unif1, "list") + expect_is(smc_test_partial_unif1, "SMCMallows") expect_length(smc_test_partial_unif1, 3) expect_equal(dim(smc_test_partial_unif1$rho_samples), c(N, n_items, 6)) expect_length(smc_test_partial_unif1$ESS, Time2) @@ -91,7 +91,7 @@ smc_test_partial_unif2 <- smc_mallows_new_item_rank( aug_rankings_init = smc_test_new_user_unif$augmented_rankings ) test_that("Updated item rank output (alpha variable) is OK", { - expect_is(smc_test_partial_unif2, "list") + expect_is(smc_test_partial_unif2, "SMCMallows") expect_length(smc_test_partial_unif2, 4) expect_equal(dim(smc_test_partial_unif2$rho_samples), c(N, n_items, 6)) expect_length(smc_test_partial_unif2$ESS, Time2) @@ -120,7 +120,7 @@ smc_test_partial_pseudo1 <- smc_mallows_new_item_rank( alpha_fixed = TRUE ) test_that("Updated item rank output is OK", { - expect_is(smc_test_partial_pseudo1, "list") + expect_is(smc_test_partial_pseudo1, "SMCMallows") expect_length(smc_test_partial_pseudo1, 3) expect_equal(dim(smc_test_partial_pseudo1$rho_samples), c(N, n_items, 6)) expect_length(smc_test_partial_pseudo1$ESS, Time2) @@ -138,7 +138,7 @@ smc_test_partial_pseudo2 <- smc_mallows_new_item_rank( aug_rankings_init = smc_test_new_user_unif$augmented_rankings ) test_that("Updated item rank output (variable alpha) is OK", { - expect_is(smc_test_partial_pseudo2, "list") + expect_is(smc_test_partial_pseudo2, "SMCMallows") expect_length(smc_test_partial_pseudo2, 4) expect_equal(dim(smc_test_partial_pseudo2$rho_samples), c(N, n_items, 6)) expect_length(smc_test_partial_pseudo2$ESS, Time2) diff --git a/vignettes/SMC-Mallows.Rmd b/vignettes/SMC-Mallows.Rmd index b6d75587..a0606f7b 100644 --- a/vignettes/SMC-Mallows.Rmd +++ b/vignettes/SMC-Mallows.Rmd @@ -37,15 +37,12 @@ A list is provided with the most commonly used functions in this extension of th |:-----------------------------|:--------------------------------------------| | `smc_mallows_new_users` | Runs the SMC algorithm for case where we observe full rankings as new observational data. | | `smc_mallows_new_item_rank` | Runs the SMC algorithm for case where we observe updated partial rankings as from existing users. | -| `plot_rho_posterior`| Plots posterior density of $\boldsymbol{\rho}$ for a selection of items. | +| `plot`| Plots posterior density of $\boldsymbol{\rho}$ or $\alpha$ for a selection of items. | | `compute_posterior_intervals_rho` | Computes the Bayesian posterior intervals for $\boldsymbol{\rho}$.| | `compute_consensus_rho`| Computes the CP estimate or MAP estimate of the latent ranks. | -| `plot_alpha_posterior` | Plots the posterior density of $\alpha$.| | `compute_posterior_intervals_alpha` | Computes the Bayesian posterior intervals for $\alpha$.| - - ## Introduction We provide a summary on the Bayesian Mallows model and the proposed Sequential Monte Carlo framework which updates the parameter estimates of the posterior each time we receive new observations for a fixed computational cost. More information on the Bayesian Mallows model can found in @vitelli2018 and @liu2019, and a vignette on the `BayesMallows` R package can be found in @sorensen2020. A general discussion on SMC can be found in @del2006sequential and @doucet2009tutorial. @@ -152,15 +149,10 @@ smc_test <- smc_mallows_new_users( The example `smc_test` returns a list variable of the particles values for $\boldsymbol{\rho}$ and $\alpha$ at each time step, so we can observe the posterior as it evolves. Specifically the list contains a three dimensional matrix of size `N` by `n_items` by `Time+1`, named `rho_samples`, and an `N` by `Time+1` matrix called `alpha_samples`. These matrices can be studied using some post-processing functions for visualising and analysing the posterior. It is also possible to make comparative results using the MCMC algorithm and the SMC algorithm. Unlike MCMC we cannot observe the trace plots and we do not need to specify a non-zero value for the burn-in. The indexing in R means that we have to view the `Time+1` slice of the output in order to view the posterior once all 100 rankings have been observed. -Here, we can observe the posterior probabilities of $\boldsymbol{\rho}$ for a selection of items can be observed by calling the function `plot_rho_posterior`. +Here, we can observe the posterior probabilities of $\boldsymbol{\rho}$ for a selection of items can be observed by calling the function `plot()`. ```{r smc_complete_analysis_heatplot, message=FALSE, warning=FALSE} -test_sample_rho <- smc_test$rho_samples[, , Time + 1] -plot_rho_posterior( - output = test_sample_rho, - nmc = N, burnin = 0, C = 1, - colnames = colnames(sushi_rankings) -) +plot(smc_test, colnames = colnames(sushi_rankings), parameter = "rho") ``` The posterior distributions of $\boldsymbol{\rho}$ and $\alpha$ can be studied with some of the the analysis tools provided by the extension. The posterior intervals for the consensus ranking of each sushi item are obtained by calling `compute_posterior_intervals_rho`. @@ -184,14 +176,11 @@ compute_rho_consensus( ) ``` -Similarly, we can observe the posterior density and the posterior intervals for the scale parameter using the functions `plot_alpha_posterior` and `compute_posterior_intervals_alpha`. +Similarly, we can observe the posterior density and the posterior intervals for the scale parameter using the functions `plot()` and `compute_posterior_intervals_alpha`. ```{r smc_complete_alpha_analysis, message=FALSE, warning=FALSE} +plot(smc_test, nmc = N, burnin = 0, parameter = "alpha") test_sample_alpha <- smc_test$alpha_samples[, Time + 1] -plot_alpha_posterior( - output = test_sample_alpha, nmc = N, - burnin = 0 -) compute_posterior_intervals_alpha( output = test_sample_alpha, nmc = N, burnin = 0, verbose = FALSE @@ -248,7 +237,7 @@ smc_partial_test <- smc_mallows_new_users( alpha_max = 1e6, aug_method = aug_method ) -#>Error in smc_mallows_new_users(R_obs = data_partial, type = "partial", +#>Error in smc_mallows_new_users(R_obs = data_partial, type = "partial", #>: Combined choice of metric and aug_method is incompatible ``` @@ -284,16 +273,8 @@ The variable `smc_test_partial` contains a list with three dimensional matrix of ```{r smc_partial_analysis, message=FALSE, warning=FALSE} -partial_test_sample_rho <- smc_partial_test$rho_samples[, , Time + 1] -partial_test_sample_alpha <- smc_partial_test$alpha_samples[, Time + 1] -plot_rho_posterior( - output = partial_test_sample_rho, nmc = N, - burnin = 0, C = 1, colnames = colnames(sushi_rankings) -) -plot_alpha_posterior( - output = partial_test_sample_alpha, nmc = N, - burnin = 0 -) +plot(smc_partial_test, colnames = colnames(sushi_rankings), parameter = "rho") +plot(smc_partial_test, nmc = N, burnin = 0, parameter = "alpha") ``` @@ -367,13 +348,8 @@ smc_test_updated_partial <- smc_mallows_new_item_rank( Unlike the two previous demonstrations, the final slice of the output occurs at `Time` instead of `Time+1`. This is because we initialised the algorithm with the first slice of the `test_dataset` rather than initialising with no observed data. We observe the posterior probabilities for items in $\boldsymbol{\rho}$ and posterior density for $\alpha$ by using the same post-processing functions as before. ```{r smc_updated_partial_analysis, message=FALSE, warning=FALSE} -updated_partial_test_sample_rho <- smc_test_updated_partial$rho_samples[, , Time] -updated_partial_test_sample_alpha <- smc_test_updated_partial$alpha_samples[, Time] -plot_rho_posterior(output = updated_partial_test_sample_rho, nmc = N, burnin = 0, C = 1) -plot_alpha_posterior(output = updated_partial_test_sample_alpha, nmc = N, burnin = 0) +plot(smc_test_updated_partial, parameter = "rho") +plot(smc_test_updated_partial, parameter = "alpha") ``` - - - ## References