From 5d713158aeb206ce6effce07976ba5ed881b046e Mon Sep 17 00:00:00 2001
From: Max Kuhn <mxkuhn@gmail.com>
Date: Mon, 25 Nov 2024 13:07:40 -0500
Subject: [PATCH] Changes mandated by CRAN (#1374)

* subselect no longer on CRAN

* re-document

* GHA update

* GHA updates to make text coverage work

* doc update

* add missing argument
---
 .Rbuildignore                        |  1 +
 .github/.gitignore                   |  1 +
 .github/workflows/R-CMD-check.yaml   | 21 +++++----
 .github/workflows/pr-commands.yaml   | 12 +++--
 .github/workflows/test-coverage.yaml | 29 ++++++++----
 README.md                            |  4 +-
 pkg/caret/DESCRIPTION                |  3 +-
 pkg/caret/R/calibration.R            |  4 +-
 pkg/caret/R/findCorrelation.R        | 70 +++++++++++++---------------
 pkg/caret/R/findLinearCombos.R       | 23 ++++-----
 pkg/caret/inst/NEWS.Rd               |  2 +-
 pkg/caret/man/calibration.Rd         |  4 +-
 pkg/caret/man/findCorrelation.Rd     | 10 +---
 pkg/caret/man/findLinearCombos.Rd    |  3 --
 14 files changed, 96 insertions(+), 91 deletions(-)
 create mode 100644 .github/.gitignore

diff --git a/.Rbuildignore b/.Rbuildignore
index c503c4f64..763b34098 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1 +1,2 @@
 ^\.github$
+^codecov\.yml$
diff --git a/.github/.gitignore b/.github/.gitignore
new file mode 100644
index 000000000..2d19fc766
--- /dev/null
+++ b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 91509fddf..59a28fb9f 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -8,9 +8,10 @@ on:
   push:
     branches: [main, master]
   pull_request:
-    branches: [main, master]
 
-name: R-CMD-check
+name: R-CMD-check.yaml
+
+permissions: read-all
 
 jobs:
   R-CMD-check:
@@ -24,20 +25,20 @@ jobs:
         config:
           - {os: macos-latest,   r: 'release'}
           - {os: windows-latest, r: 'release'}
-          # Use 3.6 to trigger usage of RTools35
-          - {os: windows-latest, r: '3.6'}
-          # use 4.1 to check with rtools40's older compiler
-          - {os: windows-latest, r: '4.1'}
-          - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
-          - {os: ubuntu-latest,   r: 'release'}
+          - {os: ubuntu-latest,  r: 'release'}
 
+          # use 4.0 or 4.1 to check with rtools40's older compiler
+          - {os: windows-latest, r: 'oldrel-4'}
+          - {os: ubuntu-latest,  r: 'devel', http-user-agent: 'release'}
+
+          - {os: ubuntu-latest,  r: 'oldrel-1'}
 
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
       R_KEEP_PKG_SOURCE: yes
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-pandoc@v2
 
@@ -46,6 +47,7 @@ jobs:
           r-version: ${{ matrix.config.r }}
           http-user-agent: ${{ matrix.config.http-user-agent }}
           use-public-rspm: true
+          working-directory: pkg/caret
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
@@ -56,4 +58,5 @@ jobs:
       - uses: r-lib/actions/check-r-package@v2
         with:
           upload-snapshots: true
+          build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
           working-directory: pkg/caret
diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml
index 49ca6c6f5..b6ba22a17 100644
--- a/.github/workflows/pr-commands.yaml
+++ b/.github/workflows/pr-commands.yaml
@@ -4,7 +4,9 @@ on:
   issue_comment:
     types: [created]
 
-name: Commands
+name: pr-commands.yaml
+
+permissions: read-all
 
 jobs:
   document:
@@ -13,8 +15,10 @@ jobs:
     runs-on: ubuntu-latest
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/pr-fetch@v2
         with:
@@ -51,8 +55,10 @@ jobs:
     runs-on: ubuntu-latest
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/pr-fetch@v2
         with:
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
index 45959c6b1..bf3ceb7c0 100644
--- a/.github/workflows/test-coverage.yaml
+++ b/.github/workflows/test-coverage.yaml
@@ -4,9 +4,10 @@ on:
   push:
     branches: [main, master]
   pull_request:
-    branches: [main, master]
 
-name: test-coverage
+name: test-coverage.yaml
+
+permissions: read-all
 
 jobs:
   test-coverage:
@@ -15,38 +16,50 @@ jobs:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-r@v2
         with:
           use-public-rspm: true
+          working-directory: pkg/caret
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::covr
+          extra-packages: any::covr, any::xml2
           needs: coverage
           working-directory: pkg/caret
 
       - name: Test coverage
         run: |
-          covr::codecov(
+          cov <- covr::package_coverage(
             quiet = FALSE,
             clean = FALSE,
             path = "pkg/caret",
-            install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
+            install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
           )
+          covr::to_cobertura(cov)
         shell: Rscript {0}
 
+      - uses: codecov/codecov-action@v4
+        with:
+          # Fail if error if not on PR, or if on PR and token is given
+          fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
+          file: ./cobertura.xml
+          plugin: noop
+          disable_search: true
+          token: ${{ secrets.CODECOV_TOKEN }}
+          working-directory: pkg/caret
+
       - name: Show testthat output
         if: always()
         run: |
           ## --------------------------------------------------------------------
-          find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
+          find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
         shell: bash
 
       - name: Upload test results
         if: failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: coverage-test-failures
           path: ${{ runner.temp }}/package
diff --git a/README.md b/README.md
index e0b186937..7c4cd074f 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
-[![R-CMD-check](https://github.com/topepo/caret/workflows/R-CMD-check/badge.svg)](https://github.com/topepo/caret/actions)
-[![Coverage Status](https://coveralls.io/repos/topepo/caret/badge.svg?branch=master)](https://coveralls.io/r/topepo/caret?branch=master)
+![R-CMD-check](https://github.com/topepo/caret/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/topepo/caret/actions/workflows/R-CMD-check.yaml)
+[![Codecov test coverage](https://codecov.io/gh/topepo/caret/graph/badge.svg)](https://app.codecov.io/gh/topepo/caret)
 [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/caret)](http://cran.r-project.org/web/packages/caret)
 [![Downloads](http://cranlogs.r-pkg.org/badges/caret)](http://cran.rstudio.com/package=caret)
   
diff --git a/pkg/caret/DESCRIPTION b/pkg/caret/DESCRIPTION
index 373b2e158..62fc9d86d 100644
--- a/pkg/caret/DESCRIPTION
+++ b/pkg/caret/DESCRIPTION
@@ -107,11 +107,10 @@ Suggests:
     rmarkdown,
     rpart,
     spls,
-    subselect,
     superpc,
     testthat (>= 0.9.1),
     themis (>= 0.1.3)
 VignetteBuilder: 
     knitr
 Encoding: UTF-8
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
diff --git a/pkg/caret/R/calibration.R b/pkg/caret/R/calibration.R
index ef58de348..3a37d1bd5 100644
--- a/pkg/caret/R/calibration.R
+++ b/pkg/caret/R/calibration.R
@@ -16,8 +16,8 @@
 #' \code{envir} argument in \code{eval}, e.g., a list or an environment) containing values for any
 #' variables in the formula, as well as \code{groups} and \code{subset} if applicable. If not found in
 #' \code{data}, or if \code{data} is unspecified, the variables are looked for in the environment of the
-#' formula. This argument is not used for \code{xyplot.calibration}. For {ggplot.calibration}, \code{data}
-#' should be an object of class "\code{calibration}"."
+#' formula. This argument is not used for \code{xyplot.calibration}. For \code{ggplot.calibration}, \code{data}
+#' should be an object of class "\code{calibration}".
 #'
 #' @param class a character string for the class of interest
 #'
diff --git a/pkg/caret/R/findCorrelation.R b/pkg/caret/R/findCorrelation.R
index 981c43e4e..f6b98b9ed 100644
--- a/pkg/caret/R/findCorrelation.R
+++ b/pkg/caret/R/findCorrelation.R
@@ -6,13 +6,13 @@ findCorrelation_fast <- function(x, cutoff = .90, verbose = FALSE){
   averageCorr <- as.numeric(as.factor(averageCorr))
   x[lower.tri(x, diag = TRUE)] <- NA
   combsAboveCutoff <- which(abs(x) > cutoff)
-  
+
   colsToCheck <- ceiling(combsAboveCutoff / nrow(x))
   rowsToCheck <- combsAboveCutoff %% nrow(x)
-  
+
   colsToDiscard <- averageCorr[colsToCheck] > averageCorr[rowsToCheck]
   rowsToDiscard <- !colsToDiscard
-  
+
   if(verbose){
     colsFlagged <- pmin(ifelse(colsToDiscard, colsToCheck, NA),
                         ifelse(rowsToDiscard, rowsToCheck, NA), na.rm = TRUE)
@@ -22,7 +22,7 @@ findCorrelation_fast <- function(x, cutoff = .90, verbose = FALSE){
                    '\n \t Flagging column', colsFlagged, '\n'
     ))
   }
-  
+
   deletecol <- c(colsToCheck[colsToDiscard], rowsToCheck[rowsToDiscard])
   deletecol <- unique(deletecol)
   deletecol
@@ -31,29 +31,29 @@ findCorrelation_fast <- function(x, cutoff = .90, verbose = FALSE){
 findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE)
 {
   varnum <- dim(x)[1]
-  
+
   if (!isTRUE(all.equal(x, t(x)))) stop("correlation matrix is not symmetric")
   if (varnum == 1) stop("only one variable given")
-  
+
   x <- abs(x)
-  
+
   # re-ordered columns based on max absolute correlation
   originalOrder <- 1:varnum
-  
+
   averageCorr <- function(x) mean(x, na.rm = TRUE)
   tmp <- x
   diag(tmp) <- NA
-  
+
   maxAbsCorOrder <- order(apply(tmp, 2, averageCorr), decreasing = TRUE)
   x <- x[maxAbsCorOrder, maxAbsCorOrder]
   newOrder <- originalOrder[maxAbsCorOrder]
   rm(tmp)
-  
+
   deletecol <- rep(FALSE, varnum)
-  
+
   x2 <- x
   diag(x2) <- NA
-  
+
   for (i in 1:(varnum - 1)) {
     if(!any(x2[!is.na(x2)] > cutoff)){
       if (verbose) cat("All correlations <=", cutoff, "\n")
@@ -62,13 +62,13 @@ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE)
     if (deletecol[i]) next
     for (j in (i + 1):varnum) {
       if (!deletecol[i] & !deletecol[j]) {
-        
+
         if (x[i, j] > cutoff) {
           mn1 <- mean(x2[i,], na.rm = TRUE)
           mn2 <- mean(x2[-j,], na.rm = TRUE)
-          if(verbose) cat("Compare row", newOrder[i], 
-                          " and column ", newOrder[j], 
-                          "with corr ", round(x[i,j], 3), "\n")  
+          if(verbose) cat("Compare row", newOrder[i],
+                          " and column ", newOrder[j],
+                          "with corr ", round(x[i,j], 3), "\n")
           if (verbose) cat("  Means: ", round(mn1, 3), "vs", round(mn2, 3))
           if (mn1 > mn2) {
             deletecol[i] <- TRUE
@@ -92,28 +92,22 @@ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE)
 
 
 #' Determine highly correlated variables
-#' 
+#'
 #' This function searches through a correlation matrix and returns a vector of
 #' integers corresponding to columns to remove to reduce pair-wise
 #' correlations.
-#' 
+#'
 #' The absolute values of pair-wise correlations are considered. If two
 #' variables have a high correlation, the function looks at the mean absolute
 #' correlation of each variable and removes the variable with the largest mean
 #' absolute correlation.
-#' 
+#'
 #' Using \code{exact = TRUE} will cause the function to re-evaluate the average
 #' correlations at each step while \code{exact = FALSE} uses all the
 #' correlations regardless of whether they have been eliminated or not. The
 #' exact calculations will remove a smaller number of predictors but can be
 #' much slower when the problem dimensions are "big".
-#' 
-#' There are several function in the \pkg{subselect} package
-#' (\code{\link[subselect:eleaps]{leaps}},
-#' \code{\link[subselect:genetic]{genetic}},
-#' \code{\link[subselect:anneal]{anneal}}) that can also be used to accomplish
-#' the same goal but tend to retain more predictors.
-#' 
+#'
 #' @param x A correlation matrix
 #' @param cutoff A numeric value for the pair-wise absolute correlation cutoff
 #' @param verbose A boolean for printing the details
@@ -130,38 +124,38 @@ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE)
 #' \code{\link[subselect:anneal]{anneal}}, \code{\link{findLinearCombos}}
 #' @keywords manip
 #' @examples
-#' 
-#' R1 <- structure(c(1, 0.86, 0.56, 0.32, 0.85, 0.86, 1, 0.01, 0.74, 0.32, 
+#'
+#' R1 <- structure(c(1, 0.86, 0.56, 0.32, 0.85, 0.86, 1, 0.01, 0.74, 0.32,
 #'                   0.56, 0.01, 1, 0.65, 0.91, 0.32, 0.74, 0.65, 1, 0.36,
-#'                   0.85, 0.32, 0.91, 0.36, 1), 
+#'                   0.85, 0.32, 0.91, 0.36, 1),
 #'                 .Dim = c(5L, 5L))
 #' colnames(R1) <- rownames(R1) <- paste0("x", 1:ncol(R1))
 #' R1
-#' 
+#'
 #' findCorrelation(R1, cutoff = .6, exact = FALSE)
 #' findCorrelation(R1, cutoff = .6, exact = TRUE)
 #' findCorrelation(R1, cutoff = .6, exact = TRUE, names = FALSE)
-#' 
-#' 
+#'
+#'
 #' R2 <- diag(rep(1, 5))
 #' R2[2, 3] <- R2[3, 2] <- .7
 #' R2[5, 3] <- R2[3, 5] <- -.7
 #' R2[4, 1] <- R2[1, 4] <- -.67
-#' 
+#'
 #' corrDF <- expand.grid(row = 1:5, col = 1:5)
 #' corrDF$correlation <- as.vector(R2)
 #' levelplot(correlation ~ row + col, corrDF)
-#' 
+#'
 #' findCorrelation(R2, cutoff = .65, verbose = TRUE)
-#' 
+#'
 #' findCorrelation(R2, cutoff = .99, verbose = TRUE)
-#' 
+#'
 #' @export findCorrelation
 findCorrelation <- function(x, cutoff = 0.90, verbose = FALSE, names = FALSE, exact = ncol(x) < 100) {
   if(names & is.null(colnames(x)))
     stop("'x' must have column names when `names = TRUE`")
-  out <- if(exact) 
-    findCorrelation_exact(x = x, cutoff = cutoff, verbose = verbose) else 
+  out <- if(exact)
+    findCorrelation_exact(x = x, cutoff = cutoff, verbose = verbose) else
       findCorrelation_fast(x = x, cutoff = cutoff, verbose = verbose)
   out
   if(names) out <- colnames(x)[out]
diff --git a/pkg/caret/R/findLinearCombos.R b/pkg/caret/R/findLinearCombos.R
index ad9b14516..fa3c9b000 100644
--- a/pkg/caret/R/findLinearCombos.R
+++ b/pkg/caret/R/findLinearCombos.R
@@ -58,18 +58,15 @@ internalEnumLC <- function(qrObj, ...)
 
 
 #' Determine linear combinations in a matrix
-#' 
+#'
 #' Enumerate and resolve the linear combinations in a numeric matrix
-#' 
+#'
 #' The QR decomposition is used to determine if the matrix is full rank and
 #' then identify the sets of columns that are involved in the dependencies.
-#' 
+#'
 #' To "resolve" them, columns are iteratively removed and the matrix rank is
 #' rechecked.
-#' 
-#' The \code{\link[subselect:trim.matrix]{trim.matrix}} function in the
-#' \pkg{subselect} package can also be used to accomplish the same goal.
-#' 
+#'
 #' @param x a numeric matrix
 #' @return a list with elements: \item{linearCombos }{If there are linear
 #' combinations, this will be a list with elements for each dependency that
@@ -80,7 +77,7 @@ internalEnumLC <- function(qrObj, ...)
 #' @seealso \code{\link[subselect:trim.matrix]{trim.matrix}}
 #' @keywords manip
 #' @examples
-#' 
+#'
 #' testData1 <- matrix(0, nrow=20, ncol=8)
 #' testData1[,1] <- 1
 #' testData1[,2] <- round(rnorm(20), 1)
@@ -90,9 +87,9 @@ internalEnumLC <- function(qrObj, ...)
 #' testData1[1:4,6] <- 1
 #' testData1[5:10,7] <- 1
 #' testData1[11:20,8] <- 1
-#' 
+#'
 #' findLinearCombos(testData1)
-#' 
+#'
 #' testData2 <- matrix(0, nrow=6, ncol=6)
 #' testData2[,1] <- c(1, 1, 1, 1, 1, 1)
 #' testData2[,2] <- c(1, 1, 1, 0, 0, 0)
@@ -100,9 +97,9 @@ internalEnumLC <- function(qrObj, ...)
 #' testData2[,4] <- c(1, 0, 0, 1, 0, 0)
 #' testData2[,5] <- c(0, 1, 0, 0, 1, 0)
 #' testData2[,6] <- c(0, 0, 1, 0, 0, 1)
-#' 
+#'
 #' findLinearCombos(testData2)
-#' 
+#'
 #' @export findLinearCombos
 findLinearCombos <- function(x)
 {
@@ -116,7 +113,7 @@ findLinearCombos <- function(x)
       while(continue)
       {
          # keep removing linear dependencies until it resolves
-         tmp <- unlist(lapply(lcList, function(x) x[1]))   
+         tmp <- unlist(lapply(lcList, function(x) x[1]))
          tmp <- unique(tmp[!is.na(tmp)])
          badList <- unique(c(tmp, badList))
          lcList <- enumLC(x[,-badList, drop = FALSE])
diff --git a/pkg/caret/inst/NEWS.Rd b/pkg/caret/inst/NEWS.Rd
index df7c85bfc..9420a82a4 100644
--- a/pkg/caret/inst/NEWS.Rd
+++ b/pkg/caret/inst/NEWS.Rd
@@ -93,7 +93,7 @@
   \itemize{
     \item A new version was requested by CRAN since en dashes were used in the documentation.
     \item A bug was fixed where, for some recipes that involve class imbalance sampling, the resampling indicies were computed incorrectly \issue{1030}.
-    \item code{train} now removes duplicate models in the tuning grid. Duplicates could occur for models with discrete parameters.
+    \item \code{train} now removes duplicate models in the tuning grid. Duplicates could occur for models with discrete parameters.
   }
 }
 
diff --git a/pkg/caret/man/calibration.Rd b/pkg/caret/man/calibration.Rd
index 13fbb21d5..c73071fb1 100644
--- a/pkg/caret/man/calibration.Rd
+++ b/pkg/caret/man/calibration.Rd
@@ -43,8 +43,8 @@ used in \code{calibration.formula}).}
 \code{envir} argument in \code{eval}, e.g., a list or an environment) containing values for any
 variables in the formula, as well as \code{groups} and \code{subset} if applicable. If not found in
 \code{data}, or if \code{data} is unspecified, the variables are looked for in the environment of the
-formula. This argument is not used for \code{xyplot.calibration}. For {ggplot.calibration}, \code{data}
-should be an object of class "\code{calibration}"."}
+formula. This argument is not used for \code{xyplot.calibration}. For \code{ggplot.calibration}, \code{data}
+should be an object of class "\code{calibration}".}
 
 \item{class}{a character string for the class of interest}
 
diff --git a/pkg/caret/man/findCorrelation.Rd b/pkg/caret/man/findCorrelation.Rd
index fe3a0827d..c4ae26753 100644
--- a/pkg/caret/man/findCorrelation.Rd
+++ b/pkg/caret/man/findCorrelation.Rd
@@ -46,18 +46,12 @@ correlations at each step while \code{exact = FALSE} uses all the
 correlations regardless of whether they have been eliminated or not. The
 exact calculations will remove a smaller number of predictors but can be
 much slower when the problem dimensions are "big".
-
-There are several function in the \pkg{subselect} package
-(\code{\link[subselect:eleaps]{leaps}},
-\code{\link[subselect:genetic]{genetic}},
-\code{\link[subselect:anneal]{anneal}}) that can also be used to accomplish
-the same goal but tend to retain more predictors.
 }
 \examples{
 
-R1 <- structure(c(1, 0.86, 0.56, 0.32, 0.85, 0.86, 1, 0.01, 0.74, 0.32, 
+R1 <- structure(c(1, 0.86, 0.56, 0.32, 0.85, 0.86, 1, 0.01, 0.74, 0.32,
                   0.56, 0.01, 1, 0.65, 0.91, 0.32, 0.74, 0.65, 1, 0.36,
-                  0.85, 0.32, 0.91, 0.36, 1), 
+                  0.85, 0.32, 0.91, 0.36, 1),
                 .Dim = c(5L, 5L))
 colnames(R1) <- rownames(R1) <- paste0("x", 1:ncol(R1))
 R1
diff --git a/pkg/caret/man/findLinearCombos.Rd b/pkg/caret/man/findLinearCombos.Rd
index ffa01ed12..27c79c069 100644
--- a/pkg/caret/man/findLinearCombos.Rd
+++ b/pkg/caret/man/findLinearCombos.Rd
@@ -24,9 +24,6 @@ then identify the sets of columns that are involved in the dependencies.
 
 To "resolve" them, columns are iteratively removed and the matrix rank is
 rechecked.
-
-The \code{\link[subselect:trim.matrix]{trim.matrix}} function in the
-\pkg{subselect} package can also be used to accomplish the same goal.
 }
 \examples{