Skip to content

Commit

Permalink
Ulam bug 417 (#418)
Browse files Browse the repository at this point in the history
* hotfix

* updating news

* fixing Ulam issue
  • Loading branch information
osorensen authored Jul 2, 2024
1 parent 7e8e422 commit 4c793da
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 16 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: BayesMallows
Type: Package
Title: Bayesian Preference Learning with the Mallows Rank Model
Version: 2.2.0.9000
Version: 2.2.1.9000
Authors@R: c(person("Oystein", "Sorensen",
email = "oystein.sorensen.1985@gmail.com",
role = c("aut", "cre"),
Expand Down
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# BayesMallows (development versions)

* A bug in the Ulam distance implementation has been corrected. Thanks to
Xavier Benavides for discovering.

# BayesMallows 2.2.1

* Skipping a unit test which failed on CRAN's M1 Mac builder.

# BayesMallows 2.2.0

* For initialization of latent ranks when using pairwise preference data, all
Expand Down
2 changes: 1 addition & 1 deletion cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Resubmission Note

This is a resubmission containing a large number of new features.
This is a hotfix of a unit test failing on CRAN's M1 Mac builder. The test does not fail on our local M1 Mac and neither on the M1 Mac builder online, so I could not reproduce it. I have instead made sure the test is skipped on CRAN.


## Test Environments
Expand Down
36 changes: 22 additions & 14 deletions src/distances.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,25 +113,33 @@ double SpearmanDistance::d(const vec& r1, const vec& r2, const uvec& inds) {
return d(r1(inds), r2(inds));
}

// Rewritten from https://www.geeksforgeeks.org/c-program-for-longest-increasing-subsequence/
double longest_increasing_subsequence(const vec& permutation) {
int n = permutation.n_elem;
vec lis(n, fill::ones);

for (int i = 1; i < n; i++) {
for (int j = 0; j < i; j++) {
if (permutation(i) > permutation(j) && lis(i) < lis(j) + 1) {
lis(i) = lis(j) + 1;
}
// Rewritten from https://www.geeksforgeeks.org/longest-common-subsequence-dp-4/
int longest_common_subsequence(
const arma::uvec& ordering_1,
const arma::uvec& ordering_2) {
int n = ordering_1.size();
int m = ordering_2.size();

arma::vec prev = arma::zeros(m + 1);
arma::vec cur = arma::zeros(m + 1);

for (int idx1 = 1; idx1 < n + 1; idx1++) {
for (int idx2 = 1; idx2 < m + 1; idx2++) {
if (ordering_1(idx1 - 1) == ordering_2(idx2 - 1))
cur(idx2) = 1 + prev(idx2 - 1);
else
cur(idx2) = 0 + std::max(cur(idx2 - 1), prev(idx2));
}
prev = cur;
}
return max(lis);
}

return cur[m];
}

double UlamDistance::d(const vec& r1, const vec& r2) {
uvec x = sort_index(r2);
return r1.size() - longest_increasing_subsequence(r1(x));
uvec ordering_1 = sort_index(r1);
uvec ordering_2 = sort_index(r2);
return r1.size() - longest_common_subsequence(ordering_1, ordering_2);
}

double UlamDistance::d(const vec& r1, const vec& r2, const uvec& inds) {
Expand Down
7 changes: 7 additions & 0 deletions tests/testthat/test-compute_rank_distance.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ test_that("compute_rank_distance works", {
observation_frequency = observation_frequency
), c(6, 3)
)
expect_equal(
compute_rank_distance(
create_ranking(c(5, 1, 4, 3, 2)),
create_ranking(c(3, 1, 2, 4, 5)),
"ulam"
), 3
)
})

test_that("distances are right-invariant", {
Expand Down
1 change: 1 addition & 0 deletions tests/testthat/test-smc_update_correctness.R
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ test_that("update_mallows is correct for updated partial rankings", {
data = setup_rank_data(rankings = dat2)
)

skip_on_cran()
expect_equal(
mean(mod2$alpha$value),
mean(mod_bmm$alpha$value[mod_bmm$alpha$iteration > 1000]),
Expand Down
46 changes: 46 additions & 0 deletions work-docs/ulam.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]

// Dynamic Programming C++ implementation
// of LCS problem

using namespace std;

int longestCommonSubsequence(const arma::vec& r1, const arma::vec& r2)
{
int n = r1.size();
int m = r2.size();

arma::vec prev = arma::zeros(m + 1);
arma::vec cur = arma::zeros(m + 1);

for (int idx1 = 1; idx1 < n + 1; idx1++) {
for (int idx2 = 1; idx2 < m + 1; idx2++) {
if (r1(idx1 - 1) == r2(idx2 - 1))
cur(idx2) = 1 + prev(idx2 - 1);
else
cur(idx2) = 0 + std::max(cur(idx2 - 1), prev(idx2));
}
prev = cur;
}

return cur[m];
}

// [[Rcpp::export]]
int test(arma::vec r1, arma::vec r2)
{
return longestCommonSubsequence(r1, r2);
}




// You can include R code blocks in C++ files processed with sourceCpp
// (useful for testing and development). The R code will be automatically
// run after the compilation.
//

/*** R
test(c(5,1,4,3,2), c(3,1,2,4,5))
*/

0 comments on commit 4c793da

Please sign in to comment.