Develop (#271)

* Added `plot.SMCMallows()` method + example (#114) * Changed class of SMC outputs to SMCMallows (#114) This makes proper dispatching of possible, withou having any other apparent consequences. * Adapted SMC vignette and unit tests to new `plot()` method (#114) * Fixed CodeFactor issue Redundant blank line at the end of a code block should be deleted. * Gathererd SMC plot functions on same file (#114) This should help with future DRYing, perhaps both subfunctions should eventually be internalized? * Increment version number to 1.2.0.9004 * Updated NEWS.md * Updated build CI config file Using template from https://github.com/r-lib/actions/tree/v2-branch/examples#standard-ci-workflow * Added `plot.SMCMallows()` method (#263) * Added `plot.SMCMallows()` method + example (#114) * Changed class of SMC outputs to SMCMallows (#114) This makes proper dispatching of possible, withou having any other apparent consequences. * Adapted SMC vignette and unit tests to new `plot()` method (#114) * Fixed CodeFactor issue Redundant blank line at the end of a code block should be deleted. * Gathererd SMC plot functions on same file (#114) This should help with future DRYing, perhaps both subfunctions should eventually be internalized? * Increment version number to 1.2.0.9004 * Updated NEWS.md * Updated docs * Removed duplicated function As mentioned in the in-code comment, `scalefun()` was already defined in a different source file. * Increment version number to 1.2.1.9001 Co-authored-by: Øystein Sørensen <oystein_sorensen@hotmail.com> * Fix documentation for `plot.SMCMallows()` (#266) * Fixed links to other package functions Links were formatted wrongly assuming the package was setup to support markdown-formatted links. This fixes it. * Increment version number to 1.2.1.9002 * Reverting changes to the `compute_mallows()` docs * Reverting changes to NEWS.md Textual change is automatically introduced by `usethis::use_dev_version()`. * Deprecated `plot_*_posterior()` (#267) Following the conversation started [here](#263 (comment)), this commit moves the superseded subfunctions of `plot.SMCMallows()` into the `smc_mallows_deprecated.R` file for eventual removal. The subfunctions themselves were renamed and test units to test the deprecation warnings were written. * Matching SMC defaults to their MCMC counterparts (#269) * Updated documentation Some text were taken from the original implementation and make no longer sense. * Matched SMC defaults with their original counterparts (#114) * SMC metric defaults to footrule (#114) * SMC leap_size defaults to 1 (#114) * SMC alpha_prop_sd defaults to 0.5 (#114) * SMC alpha_max defaults to 1e6 (#114) * SMC lambda defautls to 0.1 (#114) * Update DESCRIPTION Incremented development version. Co-authored-by: Øystein Sørensen <oystein_sorensen@hotmail.com> * Added changes from PR #269 to NEWS.md Those are important to mention, since they technically break backwards compatibility. I forgot to do that on he PR itself, thankfully there's a second chance. :) Co-authored-by: Waldir Leoncio <w.l.netto@medisin.uio.no>
ocbe-uio · Jan 5, 2023 · bf09d5b · bf09d5b
1 parent f2bd76a
commit bf09d5b
Show file tree

Hide file tree

Showing 48 changed files with 573 additions and 517 deletions.
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -7,71 +7,31 @@ name: R-CMD-check
 jobs:
   R-CMD-check:
     runs-on: ${{ matrix.config.os }}
-
     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
-
     strategy:
       fail-fast: false
       matrix:
         config:
           - {os: windows-latest, r: 'release'}
           - {os: macOS-latest, r: 'release'}
-          - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
-          - {os: ubuntu-20.04,   r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
-
+          - {os: ubuntu-20.04,   r: 'release'}
+          - {os: ubuntu-20.04,   r: 'devel', http-user-agent: 'release'}
+          - {os: ubuntu-20.04,   r: 'oldrel-1'}
     env:
-      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
-      RSPM: ${{ matrix.config.rspm }}
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
-
+      R_KEEP_PKG_SOURCE: yes
     steps:
-      - uses: actions/checkout@v2
-
-      - uses: r-lib/actions/setup-r@v1
+      - uses: actions/checkout@v3
+      - uses: r-lib/actions/setup-pandoc@v2
+      - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.config.r }}
-
-      - uses: r-lib/actions/setup-pandoc@v1
-
-      - name: Query dependencies
-        run: |
-          install.packages('remotes')
-          saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
-          writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
-        shell: Rscript {0}
-
-      - name: Restore R package cache
-        uses: actions/cache@v2
+          http-user-agent: ${{ matrix.config.http-user-agent }}
+          use-public-rspm: true
+      - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          path: ${{ env.R_LIBS_USER }}
-          key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
-          restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
-
-      - name: Install system dependencies
-        if: runner.os == 'Linux'
-        run: |
-          while read -r cmd
-          do
-            eval sudo $cmd
-          done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
-
-      - name: Install dependencies
-        run: |
-          remotes::install_deps(dependencies = TRUE)
-          remotes::install_cran("rcmdcheck")
-        shell: Rscript {0}
-
-      - name: Check
-        env:
-          _R_CHECK_CRAN_INCOMING_REMOTE_: false
-        run: |
-          options(crayon.enabled = TRUE)
-          rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
-        shell: Rscript {0}
-
-      - name: Upload check results
-        if: failure()
-        uses: actions/upload-artifact@main
+          extra-packages: any::rcmdcheck
+          needs: check
+      - uses: r-lib/actions/check-r-package@v2
         with:
-          name: ${{ runner.os }}-r${{ matrix.config.r }}-results
-          path: check
+          upload-snapshots: true
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: BayesMallows
 Type: Package
 Title: Bayesian Preference Learning with the Mallows Rank Model
-Version: 1.2.1
+Version: 1.2.1.9003
 Authors@R: c(person("Oystein", "Sorensen",
                     email = "oystein.sorensen.1985@gmail.com",
                     role = c("aut", "cre"),
@@ -42,7 +42,7 @@ URL: https://github.com/ocbe-uio/BayesMallows
 License: GPL-3
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.3
 Depends: R (>= 3.5.0)
 Imports: Rcpp (>= 1.0.0),
   ggplot2 (>= 3.1.0),

diff --git a/NAMESPACE b/NAMESPACE
@@ -4,6 +4,7 @@ S3method(compute_consensus,BayesMallows)
 S3method(compute_posterior_intervals,BayesMallows)
 S3method(compute_posterior_intervals,SMCMallows)
 S3method(plot,BayesMallows)
+S3method(plot,SMCMallows)
 S3method(print,BayesMallows)
 S3method(print,BayesMallowsMixtures)
 export(assess_convergence)

diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,12 @@
+# BayesMallows (development versions)
+
+* Added `plot.SMCMallows()` method
+* Changed default values and argument order on several SMC functions (see PR #269)
+
 # BayesMallows 1.2.1
 
 * PerMallows package has been removed from Imports because it is at risk of
-  being removed from CRAN. This means that for Ulam distance with more than 
+  being removed from CRAN. This means that for Ulam distance with more than
   95 items, the user will have to compute an importance sampling estimate.
 * Refactoring of data augmentation function for SMC Mallows.
 * Improved documentation of `sample_dataset`

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -196,7 +196,7 @@ run_mcmc <- function(rankings, obs_freq, nmc, constraints, cardinalities, logz_e
 #' @return backward_auxiliary_ranking_probability A numerical value of creating the previous augmented ranking using the same item ordering used to create the
 #' new augmented ranking in calculate_forward_probability function.
 #' @export
-calculate_backward_probability <- function(item_ordering, partial_ranking, current_ranking, remaining_set, rho, alpha, n_items, metric) {
+calculate_backward_probability <- function(item_ordering, partial_ranking, current_ranking, remaining_set, rho, alpha, n_items, metric = "footrule") {
     .Call(`_BayesMallows_calculate_backward_probability`, item_ordering, partial_ranking, current_ranking, remaining_set, rho, alpha, n_items, metric)
 }
 
@@ -224,7 +224,7 @@ calculate_backward_probability <- function(item_ordering, partial_ranking, curre
 #'   proposed augmented ranking and forward_prob a numerical value of the
 #'   probability of creating the augmented ranking using the pseudolikelihood
 #'   augmentation.
-calculate_forward_probability <- function(item_ordering, partial_ranking, remaining_set, rho, alpha, n_items, metric) {
+calculate_forward_probability <- function(item_ordering, partial_ranking, remaining_set, rho, alpha, n_items, metric = "footrule") {
     .Call(`_BayesMallows_calculate_forward_probability`, item_ordering, partial_ranking, remaining_set, rho, alpha, n_items, metric)
 }
 
@@ -262,7 +262,7 @@ correction_kernel <- function(observed_ranking, current_ranking, n_items) {
 #'   \code{"ulam"}.
 #' @return list containing R_obs, the proposed 'corrected' augmented ranking that is compatible with the new observed ranking for a user, and
 #'         forward_auxiliary_ranking_probability, a numerical value for the probability of correcting the ranking to be compatible with R_obs.
-correction_kernel_pseudo <- function(current_ranking, observed_ranking, rho, alpha, n_items, metric) {
+correction_kernel_pseudo <- function(current_ranking, observed_ranking, rho, alpha, n_items, metric = "footrule") {
     .Call(`_BayesMallows_correction_kernel_pseudo`, current_ranking, observed_ranking, rho, alpha, n_items, metric)
 }
 
@@ -311,7 +311,7 @@ correction_kernel_pseudo <- function(current_ranking, observed_ranking, rho, alp
 #'   alpha = alpha, rho = rho,  n_items = n_items, rankings = rankings ,
 #'   metric = metric
 #' )
-get_exponent_sum <- function(alpha, rho, n_items, rankings, metric) {
+get_exponent_sum <- function(alpha, rho, n_items, rankings, metric = "footrule") {
     .Call(`_BayesMallows_get_exponent_sum`, alpha, rho, n_items, rankings, metric)
 }
 
@@ -330,8 +330,8 @@ get_exponent_sum <- function(alpha, rho, n_items, rankings, metric) {
 #' @return sample_prob_list A numeric sequence of sample probabilities for selecting a specific rank given the current
 #'         rho_item_rank
 #' @export
-get_sample_probabilities <- function(rho_item_rank, alpha, remaining_set_ranks, metric, n_items) {
-    .Call(`_BayesMallows_get_sample_probabilities`, rho_item_rank, alpha, remaining_set_ranks, metric, n_items)
+get_sample_probabilities <- function(rho_item_rank, alpha, remaining_set_ranks, n_items, metric = "footrule") {
+    .Call(`_BayesMallows_get_sample_probabilities`, rho_item_rank, alpha, remaining_set_ranks, n_items, metric)
 }
 
 #' @title Leap and Shift Probabilities
@@ -353,12 +353,12 @@ get_sample_probabilities <- function(rho_item_rank, alpha, remaining_set_ranks,
 #' rho <- c(1, 2, 3, 4, 5, 6)
 #' n_items <- 6
 #'
-#' leap_and_shift_probs(rho, 1, n_items)
-#' leap_and_shift_probs(rho, 2, n_items)
-#' leap_and_shift_probs(rho, 3, n_items)
+#' leap_and_shift_probs(rho, n_items, 1)
+#' leap_and_shift_probs(rho, n_items, 2)
+#' leap_and_shift_probs(rho, n_items, 3)
 #'
-leap_and_shift_probs <- function(rho, leap_size, n_items) {
-    .Call(`_BayesMallows_leap_and_shift_probs`, rho, leap_size, n_items)
+leap_and_shift_probs <- function(rho, n_items, leap_size = 1L) {
+    .Call(`_BayesMallows_leap_and_shift_probs`, rho, n_items, leap_size)
 }
 
 #' @title SMC-Mallows new users rank
@@ -389,8 +389,8 @@ leap_and_shift_probs <- function(rho, leap_size, n_items) {
 #' @param alpha numeric value of the scale parameter.
 #' @return a 3d matrix containing: the samples of: rho, alpha and the augmented rankings, and the effective sample size at each iteration of the SMC algorithm.
 #' @export
-smc_mallows_new_item_rank <- function(n_items, R_obs, metric, leap_size, N, Time, logz_estimate, mcmc_kernel_app, aug_rankings_init = NULL, rho_samples_init = NULL, alpha_samples_init = 0L, alpha = 0, alpha_prop_sd = 1, lambda = 1, alpha_max = 1, aug_method = "random", verbose = FALSE, alpha_fixed = FALSE) {
-    .Call(`_BayesMallows_smc_mallows_new_item_rank`, n_items, R_obs, metric, leap_size, N, Time, logz_estimate, mcmc_kernel_app, aug_rankings_init, rho_samples_init, alpha_samples_init, alpha, alpha_prop_sd, lambda, alpha_max, aug_method, verbose, alpha_fixed)
+smc_mallows_new_item_rank <- function(n_items, R_obs, N, Time, logz_estimate, mcmc_kernel_app, aug_rankings_init = NULL, rho_samples_init = NULL, alpha_samples_init = 0L, alpha = 0, alpha_prop_sd = 0.5, lambda = 0.1, alpha_max = 1e6, aug_method = "random", verbose = FALSE, alpha_fixed = FALSE, metric = "footrule", leap_size = 1L) {
+    .Call(`_BayesMallows_smc_mallows_new_item_rank`, n_items, R_obs, N, Time, logz_estimate, mcmc_kernel_app, aug_rankings_init, rho_samples_init, alpha_samples_init, alpha, alpha_prop_sd, lambda, alpha_max, aug_method, verbose, alpha_fixed, metric, leap_size)
 }
 
 #' @title SMC-Mallows New Users
@@ -438,8 +438,8 @@ smc_mallows_new_item_rank <- function(n_items, R_obs, metric, leap_size, N, Time
 #'
 #' @example inst/examples/smc_mallows_new_users_complete_example.R
 #'
-smc_mallows_new_users <- function(R_obs, type, n_items, metric, leap_size, N, Time, mcmc_kernel_app, num_new_obs, alpha_prop_sd = 1, lambda = 1, alpha_max = 1, alpha = 0, aug_method = "random", logz_estimate = NULL, verbose = FALSE) {
-    .Call(`_BayesMallows_smc_mallows_new_users`, R_obs, type, n_items, metric, leap_size, N, Time, mcmc_kernel_app, num_new_obs, alpha_prop_sd, lambda, alpha_max, alpha, aug_method, logz_estimate, verbose)
+smc_mallows_new_users <- function(R_obs, type, n_items, N, Time, mcmc_kernel_app, num_new_obs, alpha_prop_sd = 0.5, lambda = 0.1, alpha_max = 1e6, alpha = 0, aug_method = "random", logz_estimate = NULL, verbose = FALSE, metric = "footnote", leap_size = 1L) {
+    .Call(`_BayesMallows_smc_mallows_new_users`, R_obs, type, n_items, N, Time, mcmc_kernel_app, num_new_obs, alpha_prop_sd, lambda, alpha_max, alpha, aug_method, logz_estimate, verbose, metric, leap_size)
 }
 
 #' @title Metropolis-Hastings Alpha
@@ -475,8 +475,8 @@ smc_mallows_new_users <- function(R_obs, type, n_items, metric, leap_size, N, Ti
 #' @example /inst/examples/metropolis_hastings_alpha_example.R
 #'
 #' @export
-metropolis_hastings_alpha <- function(alpha, n_items, rankings, metric, rho, logz_estimate, alpha_prop_sd, lambda, alpha_max) {
-    .Call(`_BayesMallows_metropolis_hastings_alpha`, alpha, n_items, rankings, metric, rho, logz_estimate, alpha_prop_sd, lambda, alpha_max)
+metropolis_hastings_alpha <- function(alpha, n_items, rankings, rho, logz_estimate, metric = "footrule", alpha_prop_sd = 0.5, alpha_max = 1e6, lambda = 0.1) {
+    .Call(`_BayesMallows_metropolis_hastings_alpha`, alpha, n_items, rankings, rho, logz_estimate, metric, alpha_prop_sd, alpha_max, lambda)
 }
 
 #' @title Metropolis-Hastings Augmented Ranking
@@ -495,8 +495,8 @@ metropolis_hastings_alpha <- function(alpha, n_items, rankings, metric, rho, log
 #' @return R_curr or R_obs A ranking sequence vector representing proposed augmented ranking for next iteration of MCMC chain
 #' @export
 #' @keywords internal
-metropolis_hastings_aug_ranking <- function(alpha, rho, n_items, partial_ranking, current_ranking, metric, pseudo) {
-    .Call(`_BayesMallows_metropolis_hastings_aug_ranking`, alpha, rho, n_items, partial_ranking, current_ranking, metric, pseudo)
+metropolis_hastings_aug_ranking <- function(alpha, rho, n_items, partial_ranking, current_ranking, pseudo, metric = "footnote") {
+    .Call(`_BayesMallows_metropolis_hastings_aug_ranking`, alpha, rho, n_items, partial_ranking, current_ranking, pseudo, metric)
 }
 
 #' @title Metropolis-Hastings Rho
@@ -535,7 +535,7 @@ metropolis_hastings_aug_ranking <- function(alpha, rho, n_items, partial_ranking
 #' 	rho = rho, leap_size = 1
 #' )
 #'
-metropolis_hastings_rho <- function(alpha, n_items, rankings, metric, rho, leap_size) {
-    .Call(`_BayesMallows_metropolis_hastings_rho`, alpha, n_items, rankings, metric, rho, leap_size)
+metropolis_hastings_rho <- function(alpha, n_items, rankings, rho, metric = "footnote", leap_size = 1L) {
+    .Call(`_BayesMallows_metropolis_hastings_rho`, alpha, n_items, rankings, rho, metric, leap_size)
 }
 
diff --git a/R/plot.SMCMallows.R b/R/plot.SMCMallows.R
@@ -0,0 +1,103 @@
+#' @title Plot SMC Posterior Distributions
+#' @description Plot posterior distributions of SMC-Mallow parameters.
+#' @param x An object of type \code{SMC-Mallows}, returned for example from
+#' \code{\link{smc_mallows_new_users}}.
+#' @param nmc Number of Monte Carlo samples
+#' @param burnin A numeric value specifying the number of iterations
+#' to discard as burn-in. Defaults to \code{model_fit$burnin}, and must be
+#' provided if \code{model_fit$burnin} does not exist. See
+#' \code{\link{assess_convergence}}.
+#' @param parameter Character string defining the parameter to plot. Available
+#' options are \code{"alpha"} and \code{"rho"}.
+#' @param time Integer determining the update slice to plot
+#' @param C Number of cluster
+#' @param colnames A vector of item names. If NULL, generic names are generated
+#' for the items in the ranking.
+#' @param items Either a vector of item names, or a vector of indices. If NULL,
+#' five items are selected randomly.
+#' @param ... Other arguments passed to \code{\link[base]{plot}} (not used).
+#' @return A plot of the posterior distributions
+#' @author Waldir Leoncio
+#' @export
+#' @example /inst/examples/plot.SMCMallows_example.R
+plot.SMCMallows <- function(x, nmc = nrow(x$rho_samples[, 1, ]), burnin = 0,
+  parameter = "alpha", time = ncol(x$rho_samples[, 1, ]), C = 1,
+  colnames = NULL, items = NULL, ...) {
+
+  if (parameter == "alpha") {
+    output <- x$alpha_samples[, time]
+    plot_alpha_smc(output, nmc, burnin)
+  } else if (parameter == "rho") {
+    output <- x$rho_samples[, , time]
+    plot_rho_smc(output, nmc, burnin, C, colnames, items)
+  } else {
+    stop("parameter must be either 'alpha' or 'rho'.")
+  }
+}
+
+plot_alpha_smc <- function(output, nmc, burnin) {
+  alpha_samples_table <- data.frame(iteration = 1:nmc, value = output)
+
+  plot_posterior_alpha <- ggplot2::ggplot(alpha_samples_table, ggplot2::aes_(x = ~value)) +
+    ggplot2::geom_density() +
+    ggplot2::xlab(expression(alpha)) +
+    ggplot2::ylab("Posterior density") +
+    ggplot2::ggtitle(label = "Implemented SMC scheme") +
+    ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
+
+  print(plot_posterior_alpha)
+}
+
+plot_rho_smc <- function(output, nmc, burnin, C, colnames = NULL, items = NULL) {
+  n_items <- dim(output)[2]
+
+  if (is.null(items) && n_items > 5) {
+    message("Items not provided by user or more than 5 items in a ranking. Picking 5 at random.")
+    items <- sample(seq_len(n_items), 5, replace = FALSE)
+    items <- sort(items)
+  } else if (is.null(items) && n_items <= 5) {
+    items <- seq_len(n_items)
+    items <- sort(items)
+  }
+
+  # do smc processing here
+  smc_plot <- smc_processing(output = output, colnames = colnames)
+
+  if (!is.character(items)) {
+    items <- unique(smc_plot$item)[items]
+  }
+
+  iteration <- rep(seq_len(nmc), times = n_items)
+  df <- cbind(iteration, smc_plot)
+
+  if (C == 1) {
+    df <- cbind(cluster = "Cluster 1", df)
+  }
+
+  df <- df[df$iteration > burnin & df$item %in% items, , drop = FALSE]
+
+  # Compute the density, rather than the count, since the latter
+  # depends on the number of Monte Carlo samples
+  df <- aggregate(list(n = df$iteration),
+    list(cluster = df$cluster, item = df$item, value = df$value),
+    FUN = length
+  )
+  df$pct <- df$n / sum(df$n)
+
+  df$item <- factor(df$item, levels = c(items))
+
+  # Finally create the plot
+  p <- ggplot2::ggplot(df, ggplot2::aes(x = .data$value, y = .data$pct)) +
+    ggplot2::geom_col() +
+    ggplot2::scale_x_continuous(labels = scalefun) +
+    ggplot2::xlab("rank") +
+    ggplot2::ylab("Posterior probability")
+
+  if (C == 1) {
+    p <- p + ggplot2::facet_wrap(~ .data$item)
+  } else {
+    p <- p + ggplot2::facet_wrap(~ .data$cluster + .data$item)
+  }
+
+  return(p)
+}