From 7e8e42223f321597a0cfda8e662a7cc570e52c0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98ystein=20S=C3=B8rensen?= Date: Fri, 19 Apr 2024 08:33:32 +0200 Subject: [PATCH 1/9] dev version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 558c2ce8..c4eb5a01 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BayesMallows Type: Package Title: Bayesian Preference Learning with the Mallows Rank Model -Version: 2.2.0 +Version: 2.2.0.9000 Authors@R: c(person("Oystein", "Sorensen", email = "oystein.sorensen.1985@gmail.com", role = c("aut", "cre"), From 4c793da44aed541385c6feb5a40ebe5761f868a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98ystein=20S=C3=B8rensen?= Date: Tue, 2 Jul 2024 11:07:07 +0200 Subject: [PATCH 2/9] Ulam bug 417 (#418) * hotfix * updating news * fixing Ulam issue --- DESCRIPTION | 2 +- NEWS.md | 9 ++++ cran-comments.md | 2 +- src/distances.cpp | 36 +++++++++------ tests/testthat/test-compute_rank_distance.R | 7 +++ tests/testthat/test-smc_update_correctness.R | 1 + work-docs/ulam.cpp | 46 ++++++++++++++++++++ 7 files changed, 87 insertions(+), 16 deletions(-) create mode 100644 work-docs/ulam.cpp diff --git a/DESCRIPTION b/DESCRIPTION index c4eb5a01..8e7f7017 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BayesMallows Type: Package Title: Bayesian Preference Learning with the Mallows Rank Model -Version: 2.2.0.9000 +Version: 2.2.1.9000 Authors@R: c(person("Oystein", "Sorensen", email = "oystein.sorensen.1985@gmail.com", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 11fd2cd8..bef9b7b4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +# BayesMallows (development versions) + +* A bug in the Ulam distance implementation has been corrected. Thanks to + Xavier Benavides for discovering. + +# BayesMallows 2.2.1 + +* Skipping a unit test which failed on CRAN's M1 Mac builder. + # BayesMallows 2.2.0 * For initialization of latent ranks when using pairwise preference data, all diff --git a/cran-comments.md b/cran-comments.md index b4b92caa..86e034d0 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,6 +1,6 @@ ## Resubmission Note -This is a resubmission containing a large number of new features. +This is a hotfix of a unit test failing on CRAN's M1 Mac builder. The test does not fail on our local M1 Mac and neither on the M1 Mac builder online, so I could not reproduce it. I have instead made sure the test is skipped on CRAN. ## Test Environments diff --git a/src/distances.cpp b/src/distances.cpp index 93dcce40..1a730758 100644 --- a/src/distances.cpp +++ b/src/distances.cpp @@ -113,25 +113,33 @@ double SpearmanDistance::d(const vec& r1, const vec& r2, const uvec& inds) { return d(r1(inds), r2(inds)); } -// Rewritten from https://www.geeksforgeeks.org/c-program-for-longest-increasing-subsequence/ -double longest_increasing_subsequence(const vec& permutation) { - int n = permutation.n_elem; - vec lis(n, fill::ones); - - for (int i = 1; i < n; i++) { - for (int j = 0; j < i; j++) { - if (permutation(i) > permutation(j) && lis(i) < lis(j) + 1) { - lis(i) = lis(j) + 1; - } +// Rewritten from https://www.geeksforgeeks.org/longest-common-subsequence-dp-4/ +int longest_common_subsequence( + const arma::uvec& ordering_1, + const arma::uvec& ordering_2) { + int n = ordering_1.size(); + int m = ordering_2.size(); + + arma::vec prev = arma::zeros(m + 1); + arma::vec cur = arma::zeros(m + 1); + + for (int idx1 = 1; idx1 < n + 1; idx1++) { + for (int idx2 = 1; idx2 < m + 1; idx2++) { + if (ordering_1(idx1 - 1) == ordering_2(idx2 - 1)) + cur(idx2) = 1 + prev(idx2 - 1); + else + cur(idx2) = 0 + std::max(cur(idx2 - 1), prev(idx2)); } + prev = cur; } - return max(lis); -} + return cur[m]; +} double UlamDistance::d(const vec& r1, const vec& r2) { - uvec x = sort_index(r2); - return r1.size() - longest_increasing_subsequence(r1(x)); + uvec ordering_1 = sort_index(r1); + uvec ordering_2 = sort_index(r2); + return r1.size() - longest_common_subsequence(ordering_1, ordering_2); } double UlamDistance::d(const vec& r1, const vec& r2, const uvec& inds) { diff --git a/tests/testthat/test-compute_rank_distance.R b/tests/testthat/test-compute_rank_distance.R index fd24b3e6..165b78ea 100644 --- a/tests/testthat/test-compute_rank_distance.R +++ b/tests/testthat/test-compute_rank_distance.R @@ -76,6 +76,13 @@ test_that("compute_rank_distance works", { observation_frequency = observation_frequency ), c(6, 3) ) + expect_equal( + compute_rank_distance( + create_ranking(c(5, 1, 4, 3, 2)), + create_ranking(c(3, 1, 2, 4, 5)), + "ulam" + ), 3 + ) }) test_that("distances are right-invariant", { diff --git a/tests/testthat/test-smc_update_correctness.R b/tests/testthat/test-smc_update_correctness.R index f164c5cf..68474958 100644 --- a/tests/testthat/test-smc_update_correctness.R +++ b/tests/testthat/test-smc_update_correctness.R @@ -223,6 +223,7 @@ test_that("update_mallows is correct for updated partial rankings", { data = setup_rank_data(rankings = dat2) ) + skip_on_cran() expect_equal( mean(mod2$alpha$value), mean(mod_bmm$alpha$value[mod_bmm$alpha$iteration > 1000]), diff --git a/work-docs/ulam.cpp b/work-docs/ulam.cpp new file mode 100644 index 00000000..5a567977 --- /dev/null +++ b/work-docs/ulam.cpp @@ -0,0 +1,46 @@ +#include +// [[Rcpp::depends(RcppArmadillo)]] + +// Dynamic Programming C++ implementation +// of LCS problem + +using namespace std; + +int longestCommonSubsequence(const arma::vec& r1, const arma::vec& r2) +{ + int n = r1.size(); + int m = r2.size(); + + arma::vec prev = arma::zeros(m + 1); + arma::vec cur = arma::zeros(m + 1); + + for (int idx1 = 1; idx1 < n + 1; idx1++) { + for (int idx2 = 1; idx2 < m + 1; idx2++) { + if (r1(idx1 - 1) == r2(idx2 - 1)) + cur(idx2) = 1 + prev(idx2 - 1); + else + cur(idx2) = 0 + std::max(cur(idx2 - 1), prev(idx2)); + } + prev = cur; + } + + return cur[m]; +} + +// [[Rcpp::export]] +int test(arma::vec r1, arma::vec r2) +{ + return longestCommonSubsequence(r1, r2); +} + + + + +// You can include R code blocks in C++ files processed with sourceCpp +// (useful for testing and development). The R code will be automatically +// run after the compilation. +// + +/*** R +test(c(5,1,4,3,2), c(3,1,2,4,5)) +*/ From 0846a0289c479d1f2c7a51900f6bb26afbb62790 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98ystein=20S=C3=B8rensen?= Date: Tue, 2 Jul 2024 11:13:58 +0200 Subject: [PATCH 3/9] incrementing version before running CRAN tests --- DESCRIPTION | 2 +- NEWS.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8e7f7017..2ce9287a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BayesMallows Type: Package Title: Bayesian Preference Learning with the Mallows Rank Model -Version: 2.2.1.9000 +Version: 2.2.2 Authors@R: c(person("Oystein", "Sorensen", email = "oystein.sorensen.1985@gmail.com", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index bef9b7b4..1418db3e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# BayesMallows (development versions) +# BayesMallows 2.2.2 * A bug in the Ulam distance implementation has been corrected. Thanks to Xavier Benavides for discovering. From 4c4e02880c4b22d8b4c24a17943f35a48b7cecab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98ystein=20S=C3=B8rensen?= Date: Tue, 2 Jul 2024 11:35:35 +0200 Subject: [PATCH 4/9] updating news.md to say that no bug in Ulam distance has been fixed --- NEWS.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1418db3e..ad7e93a0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,4 @@ -# BayesMallows 2.2.2 - -* A bug in the Ulam distance implementation has been corrected. Thanks to - Xavier Benavides for discovering. +# BayesMallows (development versions) # BayesMallows 2.2.1 From 35aa24c5511fdf6d2dc1562a1b6c4fbc7bb5720d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98ystein=20S=C3=B8rensen?= Date: Mon, 8 Jul 2024 10:44:39 +0200 Subject: [PATCH 5/9] fixing error in likelihood calculation --- DESCRIPTION | 2 +- NEWS.md | 3 +++ R/get_mallows_loglik.R | 29 ++++++++++++----------------- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2ce9287a..8e7f7017 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BayesMallows Type: Package Title: Bayesian Preference Learning with the Mallows Rank Model -Version: 2.2.2 +Version: 2.2.1.9000 Authors@R: c(person("Oystein", "Sorensen", email = "oystein.sorensen.1985@gmail.com", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index ad7e93a0..7c787eed 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # BayesMallows (development versions) +* An error in compute_mallows_loglik when the number of clusters is more than + one has been corrected. Thanks to Marta Crispino. + # BayesMallows 2.2.1 * Skipping a unit test which failed on CRAN's M1 Mac builder. diff --git a/R/get_mallows_loglik.R b/R/get_mallows_loglik.R index 3bb8f562..d9eacfe0 100644 --- a/R/get_mallows_loglik.R +++ b/R/get_mallows_loglik.R @@ -58,25 +58,20 @@ get_mallows_loglik <- function( pfun_values <- prepare_partition_function(metric, n_items) - loglik <- vapply( - X = seq_len(n_clusters), - FUN = function(g) { - -(alpha[g] / n_items * sum(get_rank_distance( - rankings = t(rankings), - rho = rho[g, ], - metric = metric - ) * observation_frequency) + - N * get_partition_function( - alpha = alpha[g], n_items = n_items, metric = metric, pfun_values - )) * - weights[[g]] - }, - FUN.VALUE = numeric(1) - ) + pp <- sapply(1:n_clusters, function(g) { + weights[g] * exp(-alpha[g] / n_items * compute_rank_distance(rankings, rho[g, ], + metric = metric, + observation_frequency = observation_frequency + ) - BayesMallows:::get_partition_function(alpha = alpha[g], n_items = n_items, metric = metric, pfun_values)) + }) + + + loglik <- sum(log(apply(pp, 1, sum))) + if (!log) { - exp(sum(loglik)) + exp(loglik) } else { - sum(loglik) + loglik } } From 532146ae52701f26c96241743a297c818adf3003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98ystein=20S=C3=B8rensen?= Date: Mon, 8 Jul 2024 10:47:21 +0200 Subject: [PATCH 6/9] added unit test for log likelihood with clusters --- tests/testthat/test-get_mallows_loglik.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/testthat/test-get_mallows_loglik.R b/tests/testthat/test-get_mallows_loglik.R index 83233694..3d2ad9e4 100644 --- a/tests/testthat/test-get_mallows_loglik.R +++ b/tests/testthat/test-get_mallows_loglik.R @@ -119,3 +119,21 @@ test_that("get_mallows_loglik works", { "Partition function not available." ) }) + +test_that("get_mallows_loglik is correct with clusters", { + rankings <- R <- potato_visual + n_items <- ncol(R) + N <- nrow(R) + + rho <- rbind(potato_true_ranking,1:20) + alpha <- c(2.5,1) + weights <- c(0.2,0.8) + + expect_equal( + get_mallows_loglik(rho[1,],alpha[1],1,rankings = R,metric='spearman'), + -279.763590378285) + + expect_equal( + get_mallows_loglik(rho,alpha,weights,rankings = R,metric='spearman'), + -299.076845327494) +}) From 28e5c940486e5c603b5f3dc4839ee8315467538d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98ystein=20S=C3=B8rensen?= Date: Mon, 8 Jul 2024 10:49:33 +0200 Subject: [PATCH 7/9] fixed namespace thing --- R/get_mallows_loglik.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_mallows_loglik.R b/R/get_mallows_loglik.R index d9eacfe0..1590c528 100644 --- a/R/get_mallows_loglik.R +++ b/R/get_mallows_loglik.R @@ -62,7 +62,7 @@ get_mallows_loglik <- function( weights[g] * exp(-alpha[g] / n_items * compute_rank_distance(rankings, rho[g, ], metric = metric, observation_frequency = observation_frequency - ) - BayesMallows:::get_partition_function(alpha = alpha[g], n_items = n_items, metric = metric, pfun_values)) + ) - get_partition_function(alpha = alpha[g], n_items = n_items, metric = metric, pfun_values)) }) From d386bfdfcb65e56678476eb042f3b244798171c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98ystein=20S=C3=B8rensen?= Date: Mon, 8 Jul 2024 10:57:49 +0200 Subject: [PATCH 8/9] switching to vapply for safety --- R/get_mallows_loglik.R | 12 ++++++------ tests/testthat/test-get_mallows_loglik.R | 6 ++++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/R/get_mallows_loglik.R b/R/get_mallows_loglik.R index 1590c528..c41104d8 100644 --- a/R/get_mallows_loglik.R +++ b/R/get_mallows_loglik.R @@ -58,17 +58,17 @@ get_mallows_loglik <- function( pfun_values <- prepare_partition_function(metric, n_items) - pp <- sapply(1:n_clusters, function(g) { + pp <- vapply(seq_len(n_clusters), function(g) { weights[g] * exp(-alpha[g] / n_items * compute_rank_distance(rankings, rho[g, ], - metric = metric, - observation_frequency = observation_frequency + metric = metric, + observation_frequency = observation_frequency ) - get_partition_function(alpha = alpha[g], n_items = n_items, metric = metric, pfun_values)) - }) - + }, + FUN.VALUE = numeric(nrow(rankings)) + ) loglik <- sum(log(apply(pp, 1, sum))) - if (!log) { exp(loglik) } else { diff --git a/tests/testthat/test-get_mallows_loglik.R b/tests/testthat/test-get_mallows_loglik.R index 3d2ad9e4..5d45488b 100644 --- a/tests/testthat/test-get_mallows_loglik.R +++ b/tests/testthat/test-get_mallows_loglik.R @@ -130,10 +130,12 @@ test_that("get_mallows_loglik is correct with clusters", { weights <- c(0.2,0.8) expect_equal( - get_mallows_loglik(rho[1,],alpha[1],1,rankings = R,metric='spearman'), + get_mallows_loglik(rho = rho[1,], alpha = alpha[1], weights = 1, + rankings = R, metric = 'spearman'), -279.763590378285) expect_equal( - get_mallows_loglik(rho,alpha,weights,rankings = R,metric='spearman'), + get_mallows_loglik(rho = rho, alpha = alpha, weights = weights, + rankings = R, metric = 'spearman'), -299.076845327494) }) From 16af266c9ca5cb5c41a5ebe273a58a6f8a83f6c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98ystein=20S=C3=B8rensen?= Date: Mon, 8 Jul 2024 10:58:02 +0200 Subject: [PATCH 9/9] fixing some more wrong test expectations --- tests/testthat/test-get_mallows_loglik.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-get_mallows_loglik.R b/tests/testthat/test-get_mallows_loglik.R index 5d45488b..31257705 100644 --- a/tests/testthat/test-get_mallows_loglik.R +++ b/tests/testthat/test-get_mallows_loglik.R @@ -83,7 +83,7 @@ test_that("get_mallows_loglik works", { rankings = freq_distr[, 1:n_items], observation_frequency = freq_distr[, n_items + 1], log = FALSE - )), "1.434e-74" + )), "3.719e-53" ) expect_equal(round(get_mallows_loglik( @@ -94,7 +94,7 @@ test_that("get_mallows_loglik works", { rankings = freq_distr[, 1:n_items], observation_frequency = freq_distr[, n_items + 1], log = TRUE - ), 4), -170.0306) + ), 4), -120.7237) expect_error( get_mallows_loglik(