diff --git a/DESCRIPTION b/DESCRIPTION index 55542842..8e7f7017 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BayesMallows Type: Package Title: Bayesian Preference Learning with the Mallows Rank Model -Version: 2.2.1 +Version: 2.2.1.9000 Authors@R: c(person("Oystein", "Sorensen", email = "oystein.sorensen.1985@gmail.com", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 65244960..7c787eed 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# BayesMallows (development versions) + +* An error in compute_mallows_loglik when the number of clusters is more than + one has been corrected. Thanks to Marta Crispino. + # BayesMallows 2.2.1 * Skipping a unit test which failed on CRAN's M1 Mac builder. diff --git a/R/get_mallows_loglik.R b/R/get_mallows_loglik.R index 3bb8f562..c41104d8 100644 --- a/R/get_mallows_loglik.R +++ b/R/get_mallows_loglik.R @@ -58,25 +58,20 @@ get_mallows_loglik <- function( pfun_values <- prepare_partition_function(metric, n_items) - loglik <- vapply( - X = seq_len(n_clusters), - FUN = function(g) { - -(alpha[g] / n_items * sum(get_rank_distance( - rankings = t(rankings), - rho = rho[g, ], - metric = metric - ) * observation_frequency) + - N * get_partition_function( - alpha = alpha[g], n_items = n_items, metric = metric, pfun_values - )) * - weights[[g]] - }, - FUN.VALUE = numeric(1) + pp <- vapply(seq_len(n_clusters), function(g) { + weights[g] * exp(-alpha[g] / n_items * compute_rank_distance(rankings, rho[g, ], + metric = metric, + observation_frequency = observation_frequency + ) - get_partition_function(alpha = alpha[g], n_items = n_items, metric = metric, pfun_values)) + }, + FUN.VALUE = numeric(nrow(rankings)) ) + loglik <- sum(log(apply(pp, 1, sum))) + if (!log) { - exp(sum(loglik)) + exp(loglik) } else { - sum(loglik) + loglik } } diff --git a/src/distances.cpp b/src/distances.cpp index 93dcce40..1a730758 100644 --- a/src/distances.cpp +++ b/src/distances.cpp @@ -113,25 +113,33 @@ double SpearmanDistance::d(const vec& r1, const vec& r2, const uvec& inds) { return d(r1(inds), r2(inds)); } -// Rewritten from https://www.geeksforgeeks.org/c-program-for-longest-increasing-subsequence/ -double longest_increasing_subsequence(const vec& permutation) { - int n = permutation.n_elem; - vec lis(n, fill::ones); - - for (int i = 1; i < n; i++) { - for (int j = 0; j < i; j++) { - if (permutation(i) > permutation(j) && lis(i) < lis(j) + 1) { - lis(i) = lis(j) + 1; - } +// Rewritten from https://www.geeksforgeeks.org/longest-common-subsequence-dp-4/ +int longest_common_subsequence( + const arma::uvec& ordering_1, + const arma::uvec& ordering_2) { + int n = ordering_1.size(); + int m = ordering_2.size(); + + arma::vec prev = arma::zeros(m + 1); + arma::vec cur = arma::zeros(m + 1); + + for (int idx1 = 1; idx1 < n + 1; idx1++) { + for (int idx2 = 1; idx2 < m + 1; idx2++) { + if (ordering_1(idx1 - 1) == ordering_2(idx2 - 1)) + cur(idx2) = 1 + prev(idx2 - 1); + else + cur(idx2) = 0 + std::max(cur(idx2 - 1), prev(idx2)); } + prev = cur; } - return max(lis); -} + return cur[m]; +} double UlamDistance::d(const vec& r1, const vec& r2) { - uvec x = sort_index(r2); - return r1.size() - longest_increasing_subsequence(r1(x)); + uvec ordering_1 = sort_index(r1); + uvec ordering_2 = sort_index(r2); + return r1.size() - longest_common_subsequence(ordering_1, ordering_2); } double UlamDistance::d(const vec& r1, const vec& r2, const uvec& inds) { diff --git a/tests/testthat/test-compute_rank_distance.R b/tests/testthat/test-compute_rank_distance.R index fd24b3e6..165b78ea 100644 --- a/tests/testthat/test-compute_rank_distance.R +++ b/tests/testthat/test-compute_rank_distance.R @@ -76,6 +76,13 @@ test_that("compute_rank_distance works", { observation_frequency = observation_frequency ), c(6, 3) ) + expect_equal( + compute_rank_distance( + create_ranking(c(5, 1, 4, 3, 2)), + create_ranking(c(3, 1, 2, 4, 5)), + "ulam" + ), 3 + ) }) test_that("distances are right-invariant", { diff --git a/tests/testthat/test-get_mallows_loglik.R b/tests/testthat/test-get_mallows_loglik.R index 83233694..31257705 100644 --- a/tests/testthat/test-get_mallows_loglik.R +++ b/tests/testthat/test-get_mallows_loglik.R @@ -83,7 +83,7 @@ test_that("get_mallows_loglik works", { rankings = freq_distr[, 1:n_items], observation_frequency = freq_distr[, n_items + 1], log = FALSE - )), "1.434e-74" + )), "3.719e-53" ) expect_equal(round(get_mallows_loglik( @@ -94,7 +94,7 @@ test_that("get_mallows_loglik works", { rankings = freq_distr[, 1:n_items], observation_frequency = freq_distr[, n_items + 1], log = TRUE - ), 4), -170.0306) + ), 4), -120.7237) expect_error( get_mallows_loglik( @@ -119,3 +119,23 @@ test_that("get_mallows_loglik works", { "Partition function not available." ) }) + +test_that("get_mallows_loglik is correct with clusters", { + rankings <- R <- potato_visual + n_items <- ncol(R) + N <- nrow(R) + + rho <- rbind(potato_true_ranking,1:20) + alpha <- c(2.5,1) + weights <- c(0.2,0.8) + + expect_equal( + get_mallows_loglik(rho = rho[1,], alpha = alpha[1], weights = 1, + rankings = R, metric = 'spearman'), + -279.763590378285) + + expect_equal( + get_mallows_loglik(rho = rho, alpha = alpha, weights = weights, + rankings = R, metric = 'spearman'), + -299.076845327494) +}) diff --git a/work-docs/ulam.cpp b/work-docs/ulam.cpp new file mode 100644 index 00000000..5a567977 --- /dev/null +++ b/work-docs/ulam.cpp @@ -0,0 +1,46 @@ +#include +// [[Rcpp::depends(RcppArmadillo)]] + +// Dynamic Programming C++ implementation +// of LCS problem + +using namespace std; + +int longestCommonSubsequence(const arma::vec& r1, const arma::vec& r2) +{ + int n = r1.size(); + int m = r2.size(); + + arma::vec prev = arma::zeros(m + 1); + arma::vec cur = arma::zeros(m + 1); + + for (int idx1 = 1; idx1 < n + 1; idx1++) { + for (int idx2 = 1; idx2 < m + 1; idx2++) { + if (r1(idx1 - 1) == r2(idx2 - 1)) + cur(idx2) = 1 + prev(idx2 - 1); + else + cur(idx2) = 0 + std::max(cur(idx2 - 1), prev(idx2)); + } + prev = cur; + } + + return cur[m]; +} + +// [[Rcpp::export]] +int test(arma::vec r1, arma::vec r2) +{ + return longestCommonSubsequence(r1, r2); +} + + + + +// You can include R code blocks in C++ files processed with sourceCpp +// (useful for testing and development). The R code will be automatically +// run after the compilation. +// + +/*** R +test(c(5,1,4,3,2), c(3,1,2,4,5)) +*/