diff --git a/DESCRIPTION b/DESCRIPTION index 17d2d990..c6af5450 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BayesMallows Type: Package Title: Bayesian Preference Learning with the Mallows Rank Model -Version: 1.2.0.9002 +Version: 1.2.1 Authors@R: c(person("Oystein", "Sorensen", email = "oystein.sorensen.1985@gmail.com", role = c("aut", "cre"), @@ -51,7 +51,6 @@ Imports: Rcpp (>= 1.0.0), sets (>= 1.0-18), relations (>= 0.6-8), rlang (>= 0.3.1), - PerMallows (>= 1.13), HDInterval (>= 0.2.0), cowplot (>= 1.0.0) LinkingTo: Rcpp, RcppArmadillo, testthat diff --git a/NEWS.md b/NEWS.md index 38daf0e7..3f3e4aa1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ -# BayesMallows (development version) +# BayesMallows 1.2.1 +* PerMallows package has been removed from Imports because it is at risk of + being removed from CRAN. This means that for Ulam distance with more than + 95 items, the user will have to compute an importance sampling estimate. * Refactoring of data augmentation function for SMC Mallows. * Improved documentation of `sample_dataset` diff --git a/R/misc.R b/R/misc.R index ef33a376..79859e55 100644 --- a/R/misc.R +++ b/R/misc.R @@ -55,12 +55,7 @@ prepare_partition_function <- function(logz_estimate, metric, n_items) { # Fourth, is it the Ulam distance? if (metric == "ulam") { - return(list( - cardinalities = unlist(lapply( - 0:(n_items - 1), - function(x) PerMallows::count.perms(perm.length = n_items, dist.value = x, dist.name = "ulam") - )) - )) + message("Exact partition function no longer available for Ulam distance with >95 items.") } # Fifth, can we compute the partition function in our C++ code? diff --git a/cran-comments.md b/cran-comments.md index cb30a933..c7d75aa8 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,10 +1,10 @@ ## Resubmission Note -This is a resubmission. It contains some bugfixes, refactoring of C++ code, and -(soft-)deprecation of some functions to make the API cleaners. +This is a resubmission. It removes the dependency on the PerMallows package, due to an +e-mail from Dr. Hornik warning that PerMallows is scheduled to be archived from CRAN. ## Test Environments -* local Windows install, R 4.2.0. +* local Windows install, R 4.2.1. * windows, devel, release and old-release. * Apple Silicon (M1) via rhub. * valgrind and GCC-UBSAN via rhub. diff --git a/inst/examples/sample_mallows_example.R b/inst/examples/sample_mallows_example.R index 2654a31b..e9175c8e 100644 --- a/inst/examples/sample_mallows_example.R +++ b/inst/examples/sample_mallows_example.R @@ -31,18 +31,3 @@ model_fit <- compute_mallows(samples, nmc = 10000) # The highest posterior density interval covers alpha0 = 10. compute_posterior_intervals(model_fit, burnin = 2000, parameter = "alpha") -# The PerMallows package has a Gibbs sampler for sampling from the Mallows -# distribution with Cayley, Kendall, Hamming, and Ulam distances. For these -# distances, using the PerMallows package is typically faster. - -# Let us sample 100 rankings from the Mallows model with Cayley distance, -# with the same consensus ranking and scale parameter as above. -library(PerMallows) -# Set the scale parameter of the PerMallows package corresponding to -# alpha0 in BayesMallows -theta0 <- alpha0 / n_items -# Sample with PerMallows::rmm -sample1 <- rmm(n = 100, sigma0 = rho0, theta = theta0, dist.name = "cayley") -# Generate the same sample with sample_mallows -sample2 <- sample_mallows(rho0 = rho0, alpha0 = alpha0, n_samples = 100, - burnin = 1000, thinning = 1000, metric = "cayley") diff --git a/man/sample_mallows.Rd b/man/sample_mallows.Rd index 939f70aa..2c5dae09 100644 --- a/man/sample_mallows.Rd +++ b/man/sample_mallows.Rd @@ -97,21 +97,6 @@ model_fit <- compute_mallows(samples, nmc = 10000) # The highest posterior density interval covers alpha0 = 10. compute_posterior_intervals(model_fit, burnin = 2000, parameter = "alpha") -# The PerMallows package has a Gibbs sampler for sampling from the Mallows -# distribution with Cayley, Kendall, Hamming, and Ulam distances. For these -# distances, using the PerMallows package is typically faster. - -# Let us sample 100 rankings from the Mallows model with Cayley distance, -# with the same consensus ranking and scale parameter as above. -library(PerMallows) -# Set the scale parameter of the PerMallows package corresponding to -# alpha0 in BayesMallows -theta0 <- alpha0 / n_items -# Sample with PerMallows::rmm -sample1 <- rmm(n = 100, sigma0 = rho0, theta = theta0, dist.name = "cayley") -# Generate the same sample with sample_mallows -sample2 <- sample_mallows(rho0 = rho0, alpha0 = alpha0, n_samples = 100, - burnin = 1000, thinning = 1000, metric = "cayley") } \references{ \insertAllCited{} diff --git a/tests/testthat/test-distance_function.R b/tests/testthat/test-distance_function.R index 78a83cad..ab33a8c9 100644 --- a/tests/testthat/test-distance_function.R +++ b/tests/testthat/test-distance_function.R @@ -33,14 +33,20 @@ test_that("Spearman distance is correct", { # Loop over some n values test_that("Kendall distance is correct", { - for (n in c(2, 3, 5)) { + ns <- c(2, 3, 5) + correct <- list(c(0, 1), c(0, 1, 1, 2, 2, 3), + c(0, 1, 1, 2, 2, 3, 1, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 5, 3, 4, + 4, 5, 5, 6, 1, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 5, 3, 4, 4, 5, 5, + 6, 4, 5, 5, 6, 6, 7, 2, 3, 3, 4, 4, 5, 3, 4, 4, 5, 5, 6, 4, 5, + 5, 6, 6, 7, 5, 6, 6, 7, 7, 8, 3, 4, 4, 5, 5, 6, 4, 5, 5, 6, 6, + 7, 5, 6, 6, 7, 7, 8, 6, 7, 7, 8, 8, 9, 4, 5, 5, 6, 6, 7, 5, 6, + 6, 7, 7, 8, 6, 7, 7, 8, 8, 9, 7, 8, 8, 9, 9, 10)) + for (i in seq_along(ns)) { expect_equal( - check_dist(n, fun = function(r1, r2) { + check_dist(ns[[i]], fun = function(r1, r2) { get_rank_distance(r1, r2, "kendall") }), - check_dist(n, fun = function(r1, r2) { - PerMallows::distance(r1, r2, "kendall") - }) + correct[[i]] ) } }) @@ -48,14 +54,20 @@ test_that("Kendall distance is correct", { # Loop over some n values test_that("Cayley distance is correct", { - for (n in c(2, 3, 5)) { + ns <- c(2, 3, 5) + correct <- list(c(0, 1), c(0, 1, 1, 2, 2, 1), + c(0, 1, 1, 2, 2, 1, 1, 2, 2, 3, 3, 2, 2, 3, 1, 2, 2, 3, 3, 2, + 2, 1, 3, 2, 1, 2, 2, 3, 3, 2, 2, 3, 3, 4, 4, 3, 3, 4, 2, 3, 3, + 4, 4, 3, 3, 2, 4, 3, 2, 3, 3, 4, 4, 3, 1, 2, 2, 3, 3, 2, 2, 3, + 3, 4, 4, 3, 3, 2, 4, 3, 3, 4, 3, 4, 2, 3, 3, 4, 2, 3, 1, 2, 2, + 3, 3, 4, 2, 3, 3, 4, 4, 3, 3, 4, 2, 3, 4, 3, 3, 2, 4, 3, 3, 2, + 2, 1, 3, 2, 4, 3, 3, 2, 4, 3, 3, 4, 4, 3, 3, 2)) + for (i in seq_along(ns)) { expect_equal( - check_dist(n, fun = function(r1, r2) { + check_dist(ns[[i]], fun = function(r1, r2) { get_rank_distance(r1, r2, "cayley") }), - check_dist(n, fun = function(r1, r2) { - PerMallows::distance(r1, r2, "cayley") - }) + correct[[i]] ) } }) @@ -63,14 +75,20 @@ test_that("Cayley distance is correct", { # Loop over some n values test_that("Hamming distance is correct", { - for (n in c(2, 3, 5)) { + ns <- c(2, 3, 5) + correct <- list(c(0, 2), c(0, 2, 2, 3, 3, 2), + c(0, 2, 2, 3, 3, 2, 2, 4, 3, 4, 4, 3, 3, 4, 2, 3, 4, 4, 4, 3, + 3, 2, 4, 4, 2, 4, 4, 5, 5, 4, 3, 5, 4, 5, 5, 4, 4, 5, 3, 4, 5, + 5, 5, 4, 4, 3, 5, 5, 3, 5, 4, 5, 5, 4, 2, 4, 3, 4, 4, 3, 4, 5, + 4, 5, 5, 5, 5, 4, 5, 4, 5, 5, 4, 5, 3, 4, 5, 5, 3, 4, 2, 3, 4, + 4, 4, 5, 4, 5, 5, 5, 5, 5, 5, 5, 4, 4, 5, 4, 4, 3, 5, 5, 4, 3, + 3, 2, 4, 4, 5, 4, 5, 4, 5, 5, 5, 5, 5, 5, 4, 4)) + for (i in seq_along(ns)) { expect_equal( - check_dist(n, fun = function(r1, r2) { + check_dist(ns[[i]], fun = function(r1, r2) { get_rank_distance(r1, r2, "hamming") }), - check_dist(n, fun = function(r1, r2) { - PerMallows::distance(r1, r2, "hamming") - }) + correct[[i]] ) } }) @@ -78,14 +96,20 @@ test_that("Hamming distance is correct", { # Loop over some n values test_that("Ulam distance is correct", { - for (n in c(2, 3, 5)) { + ns <- c(2, 3, 5) + correct <- list(c(0, 1), c(0, 1, 1, 1, 1, 2), + c(0, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, + 2, 2, 2, 3, 1, 2, 2, 2, 2, 3, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 3, 2, 2, 3, 3, 1, 2, 2, 2, 2, 3, 2, 3, 2, 2, 3, 3, 2, 2, + 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 1, 2, 2, 2, 2, 3, 2, 3, 2, 2, 3, + 3, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 1, 2, 2, 2, 2, 3, 2, 3, + 2, 2, 3, 3, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 4)) + for (i in seq_along(ns)) { expect_equal( - check_dist(n, fun = function(r1, r2) { + check_dist(ns[[i]], fun = function(r1, r2) { get_rank_distance(r1, r2, "ulam") }), - check_dist(n, fun = function(r1, r2) { - PerMallows::distance(r1, r2, "ulam") - }) + correct[[i]] ) } }) diff --git a/tests/testthat/test-partition_function.R b/tests/testthat/test-partition_function.R index a8760c06..9d0d012c 100644 --- a/tests/testthat/test-partition_function.R +++ b/tests/testthat/test-partition_function.R @@ -26,10 +26,6 @@ check_log_zn <- function(n, alpha, metric) { get_rank_distance, r2 = 1:n, metric = "hamming" )))) - } else if (metric == "ulam") { - log(sum(unlist(lapply(seq(0, n - 1, by = 1), function(x) { - PerMallows::count.perms(perm.length = n, dist.value = x, dist.name = "ulam") * exp(-alpha / n * x) - })))) } else { stop("Unknown metric.") } @@ -110,6 +106,10 @@ test_that("Hamming partition function is correct", { }) test_that("Ulam partition function is correct", { + # Correct values are computed from PerMallows::count.perms() + correct <- list(0, 0, 0, 0.692897211809945, 0.668459648013286, 0.474076984180107, + 1.79142615441324, 1.75861132107948, 1.47694423521443) + i <- 1 ulam_sequence <- subset( partition_function_data, metric == "ulam" & type == "cardinalities" @@ -122,8 +122,9 @@ test_that("Ulam partition function is correct", { cardinalities = ulam_sequence[[n]], metric = "ulam" ), - check_log_zn(n, alpha, "ulam") + correct[[i]] ) + i <- i + 1 } } })