Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed bug in likelihood calculation with clusters #419

Merged
merged 10 commits into from
Jul 8, 2024
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: BayesMallows
Type: Package
Title: Bayesian Preference Learning with the Mallows Rank Model
Version: 2.2.1
Version: 2.2.1.9000
Authors@R: c(person("Oystein", "Sorensen",
email = "oystein.sorensen.1985@gmail.com",
role = c("aut", "cre"),
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# BayesMallows (development versions)

* An error in compute_mallows_loglik when the number of clusters is more than
one has been corrected. Thanks to Marta Crispino.

# BayesMallows 2.2.1

* Skipping a unit test which failed on CRAN's M1 Mac builder.
Expand Down
27 changes: 11 additions & 16 deletions R/get_mallows_loglik.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,25 +58,20 @@ get_mallows_loglik <- function(

pfun_values <- prepare_partition_function(metric, n_items)

loglik <- vapply(
X = seq_len(n_clusters),
FUN = function(g) {
-(alpha[g] / n_items * sum(get_rank_distance(
rankings = t(rankings),
rho = rho[g, ],
metric = metric
) * observation_frequency) +
N * get_partition_function(
alpha = alpha[g], n_items = n_items, metric = metric, pfun_values
)) *
weights[[g]]
},
FUN.VALUE = numeric(1)
pp <- vapply(seq_len(n_clusters), function(g) {
weights[g] * exp(-alpha[g] / n_items * compute_rank_distance(rankings, rho[g, ],
metric = metric,
observation_frequency = observation_frequency
) - get_partition_function(alpha = alpha[g], n_items = n_items, metric = metric, pfun_values))
},
FUN.VALUE = numeric(nrow(rankings))
)

loglik <- sum(log(apply(pp, 1, sum)))

if (!log) {
exp(sum(loglik))
exp(loglik)
} else {
sum(loglik)
loglik
}
}
36 changes: 22 additions & 14 deletions src/distances.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,25 +113,33 @@ double SpearmanDistance::d(const vec& r1, const vec& r2, const uvec& inds) {
return d(r1(inds), r2(inds));
}

// Rewritten from https://www.geeksforgeeks.org/c-program-for-longest-increasing-subsequence/
double longest_increasing_subsequence(const vec& permutation) {
int n = permutation.n_elem;
vec lis(n, fill::ones);

for (int i = 1; i < n; i++) {
for (int j = 0; j < i; j++) {
if (permutation(i) > permutation(j) && lis(i) < lis(j) + 1) {
lis(i) = lis(j) + 1;
}
// Rewritten from https://www.geeksforgeeks.org/longest-common-subsequence-dp-4/
int longest_common_subsequence(
const arma::uvec& ordering_1,
const arma::uvec& ordering_2) {
int n = ordering_1.size();
int m = ordering_2.size();

arma::vec prev = arma::zeros(m + 1);
arma::vec cur = arma::zeros(m + 1);

for (int idx1 = 1; idx1 < n + 1; idx1++) {
for (int idx2 = 1; idx2 < m + 1; idx2++) {
if (ordering_1(idx1 - 1) == ordering_2(idx2 - 1))
cur(idx2) = 1 + prev(idx2 - 1);
else
cur(idx2) = 0 + std::max(cur(idx2 - 1), prev(idx2));
}
prev = cur;
}
return max(lis);
}

return cur[m];
}

double UlamDistance::d(const vec& r1, const vec& r2) {
uvec x = sort_index(r2);
return r1.size() - longest_increasing_subsequence(r1(x));
uvec ordering_1 = sort_index(r1);
uvec ordering_2 = sort_index(r2);
return r1.size() - longest_common_subsequence(ordering_1, ordering_2);
}

double UlamDistance::d(const vec& r1, const vec& r2, const uvec& inds) {
Expand Down
7 changes: 7 additions & 0 deletions tests/testthat/test-compute_rank_distance.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ test_that("compute_rank_distance works", {
observation_frequency = observation_frequency
), c(6, 3)
)
expect_equal(
compute_rank_distance(
create_ranking(c(5, 1, 4, 3, 2)),
create_ranking(c(3, 1, 2, 4, 5)),
"ulam"
), 3
)
})

test_that("distances are right-invariant", {
Expand Down
24 changes: 22 additions & 2 deletions tests/testthat/test-get_mallows_loglik.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ test_that("get_mallows_loglik works", {
rankings = freq_distr[, 1:n_items],
observation_frequency = freq_distr[, n_items + 1],
log = FALSE
)), "1.434e-74"
)), "3.719e-53"
)

expect_equal(round(get_mallows_loglik(
Expand All @@ -94,7 +94,7 @@ test_that("get_mallows_loglik works", {
rankings = freq_distr[, 1:n_items],
observation_frequency = freq_distr[, n_items + 1],
log = TRUE
), 4), -170.0306)
), 4), -120.7237)

expect_error(
get_mallows_loglik(
Expand All @@ -119,3 +119,23 @@ test_that("get_mallows_loglik works", {
"Partition function not available."
)
})

test_that("get_mallows_loglik is correct with clusters", {
rankings <- R <- potato_visual
n_items <- ncol(R)
N <- nrow(R)

rho <- rbind(potato_true_ranking,1:20)
alpha <- c(2.5,1)
weights <- c(0.2,0.8)

expect_equal(
get_mallows_loglik(rho = rho[1,], alpha = alpha[1], weights = 1,
rankings = R, metric = 'spearman'),
-279.763590378285)

expect_equal(
get_mallows_loglik(rho = rho, alpha = alpha, weights = weights,
rankings = R, metric = 'spearman'),
-299.076845327494)
})
46 changes: 46 additions & 0 deletions work-docs/ulam.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]

// Dynamic Programming C++ implementation
// of LCS problem

using namespace std;

int longestCommonSubsequence(const arma::vec& r1, const arma::vec& r2)
{
int n = r1.size();
int m = r2.size();

arma::vec prev = arma::zeros(m + 1);
arma::vec cur = arma::zeros(m + 1);

for (int idx1 = 1; idx1 < n + 1; idx1++) {
for (int idx2 = 1; idx2 < m + 1; idx2++) {
if (r1(idx1 - 1) == r2(idx2 - 1))
cur(idx2) = 1 + prev(idx2 - 1);
else
cur(idx2) = 0 + std::max(cur(idx2 - 1), prev(idx2));
}
prev = cur;
}

return cur[m];
}

// [[Rcpp::export]]
int test(arma::vec r1, arma::vec r2)
{
return longestCommonSubsequence(r1, r2);
}




// You can include R code blocks in C++ files processed with sourceCpp
// (useful for testing and development). The R code will be automatically
// run after the compilation.
//

/*** R
test(c(5,1,4,3,2), c(3,1,2,4,5))
*/
Loading