From 4c793da44aed541385c6feb5a40ebe5761f868a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=98ystein=20S=C3=B8rensen?=
 <oystein_sorensen@hotmail.com>
Date: Tue, 2 Jul 2024 11:07:07 +0200
Subject: [PATCH] Ulam bug 417 (#418)

* hotfix

* updating news

* fixing Ulam issue
---
 DESCRIPTION                                  |  2 +-
 NEWS.md                                      |  9 ++++
 cran-comments.md                             |  2 +-
 src/distances.cpp                            | 36 +++++++++------
 tests/testthat/test-compute_rank_distance.R  |  7 +++
 tests/testthat/test-smc_update_correctness.R |  1 +
 work-docs/ulam.cpp                           | 46 ++++++++++++++++++++
 7 files changed, 87 insertions(+), 16 deletions(-)
 create mode 100644 work-docs/ulam.cpp

diff --git a/DESCRIPTION b/DESCRIPTION
index c4eb5a01..8e7f7017 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: BayesMallows
 Type: Package
 Title: Bayesian Preference Learning with the Mallows Rank Model
-Version: 2.2.0.9000
+Version: 2.2.1.9000
 Authors@R: c(person("Oystein", "Sorensen",
                     email = "oystein.sorensen.1985@gmail.com",
                     role = c("aut", "cre"),
diff --git a/NEWS.md b/NEWS.md
index 11fd2cd8..bef9b7b4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,12 @@
+# BayesMallows (development versions)
+
+* A bug in the Ulam distance implementation has been corrected. Thanks to 
+  Xavier Benavides for discovering.
+
+# BayesMallows 2.2.1
+
+* Skipping a unit test which failed on CRAN's M1 Mac builder.
+
 # BayesMallows 2.2.0
 
 * For initialization of latent ranks when using pairwise preference data, all
diff --git a/cran-comments.md b/cran-comments.md
index b4b92caa..86e034d0 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -1,6 +1,6 @@
 ## Resubmission Note
 
-This is a resubmission containing a large number of new features.
+This is a hotfix of a unit test failing on CRAN's M1 Mac builder. The test does not fail on our local M1 Mac and neither on the M1 Mac builder online, so I could not reproduce it. I have instead made sure the test is skipped on CRAN.
 
 
 ## Test Environments
diff --git a/src/distances.cpp b/src/distances.cpp
index 93dcce40..1a730758 100644
--- a/src/distances.cpp
+++ b/src/distances.cpp
@@ -113,25 +113,33 @@ double SpearmanDistance::d(const vec& r1, const vec& r2, const uvec& inds) {
   return d(r1(inds), r2(inds));
 }
 
-// Rewritten from https://www.geeksforgeeks.org/c-program-for-longest-increasing-subsequence/
-double longest_increasing_subsequence(const vec& permutation) {
-  int n = permutation.n_elem;
-  vec lis(n, fill::ones);
-
-  for (int i = 1; i < n; i++) {
-    for (int j = 0; j < i; j++) {
-      if (permutation(i) > permutation(j) && lis(i) < lis(j) + 1) {
-        lis(i) = lis(j) + 1;
-      }
+// Rewritten from https://www.geeksforgeeks.org/longest-common-subsequence-dp-4/
+int longest_common_subsequence(
+    const arma::uvec& ordering_1,
+    const arma::uvec& ordering_2) {
+  int n = ordering_1.size();
+  int m = ordering_2.size();
+
+  arma::vec prev = arma::zeros(m + 1);
+  arma::vec cur = arma::zeros(m + 1);
+
+  for (int idx1 = 1; idx1 < n + 1; idx1++) {
+    for (int idx2 = 1; idx2 < m + 1; idx2++) {
+      if (ordering_1(idx1 - 1) == ordering_2(idx2 - 1))
+        cur(idx2) = 1 + prev(idx2 - 1);
+      else
+        cur(idx2) = 0 + std::max(cur(idx2 - 1), prev(idx2));
     }
+    prev = cur;
   }
-  return max(lis);
-}
 
+  return cur[m];
+}
 
 double UlamDistance::d(const vec& r1, const vec& r2) {
-  uvec x = sort_index(r2);
-  return r1.size() - longest_increasing_subsequence(r1(x));
+  uvec ordering_1 = sort_index(r1);
+  uvec ordering_2 = sort_index(r2);
+  return r1.size() - longest_common_subsequence(ordering_1, ordering_2);
 }
 
 double UlamDistance::d(const vec& r1, const vec& r2, const uvec& inds) {
diff --git a/tests/testthat/test-compute_rank_distance.R b/tests/testthat/test-compute_rank_distance.R
index fd24b3e6..165b78ea 100644
--- a/tests/testthat/test-compute_rank_distance.R
+++ b/tests/testthat/test-compute_rank_distance.R
@@ -76,6 +76,13 @@ test_that("compute_rank_distance works", {
       observation_frequency = observation_frequency
     ), c(6, 3)
   )
+  expect_equal(
+    compute_rank_distance(
+      create_ranking(c(5, 1, 4, 3, 2)),
+      create_ranking(c(3, 1, 2, 4, 5)),
+      "ulam"
+    ), 3
+  )
 })
 
 test_that("distances are right-invariant", {
diff --git a/tests/testthat/test-smc_update_correctness.R b/tests/testthat/test-smc_update_correctness.R
index f164c5cf..68474958 100644
--- a/tests/testthat/test-smc_update_correctness.R
+++ b/tests/testthat/test-smc_update_correctness.R
@@ -223,6 +223,7 @@ test_that("update_mallows is correct for updated partial rankings", {
     data = setup_rank_data(rankings = dat2)
   )
 
+  skip_on_cran()
   expect_equal(
     mean(mod2$alpha$value),
     mean(mod_bmm$alpha$value[mod_bmm$alpha$iteration > 1000]),
diff --git a/work-docs/ulam.cpp b/work-docs/ulam.cpp
new file mode 100644
index 00000000..5a567977
--- /dev/null
+++ b/work-docs/ulam.cpp
@@ -0,0 +1,46 @@
+#include <RcppArmadillo.h>
+// [[Rcpp::depends(RcppArmadillo)]]
+
+// Dynamic Programming C++ implementation
+// of LCS problem
+
+using namespace std;
+
+int longestCommonSubsequence(const arma::vec& r1, const arma::vec& r2)
+{
+  int n = r1.size();
+  int m = r2.size();
+
+  arma::vec prev = arma::zeros(m + 1);
+  arma::vec cur = arma::zeros(m + 1);
+
+  for (int idx1 = 1; idx1 < n + 1; idx1++) {
+    for (int idx2 = 1; idx2 < m + 1; idx2++) {
+      if (r1(idx1 - 1) == r2(idx2 - 1))
+        cur(idx2) = 1 + prev(idx2 - 1);
+      else
+        cur(idx2) = 0 + std::max(cur(idx2 - 1), prev(idx2));
+    }
+    prev = cur;
+  }
+
+  return cur[m];
+}
+
+// [[Rcpp::export]]
+int test(arma::vec r1, arma::vec r2)
+{
+  return longestCommonSubsequence(r1, r2);
+}
+
+
+
+
+// You can include R code blocks in C++ files processed with sourceCpp
+// (useful for testing and development). The R code will be automatically
+// run after the compilation.
+//
+
+/*** R
+test(c(5,1,4,3,2), c(3,1,2,4,5))
+*/